{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:19:47.891366Z",
     "start_time": "2018-07-26T06:19:40.994084Z"
    }
   },
   "outputs": [],
   "source": [
    "import gc, os\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "sys.path.append(f'/home/{os.environ.get(\"USER\")}/PythonLibrary')\n",
    "import lgbextension as ex\n",
    "import lightgbm as lgb\n",
    "from multiprocessing import cpu_count, Pool\n",
    "from glob import glob\n",
    "import utils, utils_cat\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:19:48.060977Z",
     "start_time": "2018-07-26T06:19:47.894133Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-rw-rw-r-- 1 Kazuki Kazuki 13K Jul 22 01:34 \u001b[0m\u001b[01;31m../output/722-1_X.csv.gz\u001b[0m\r\n",
      "-rw-rw-r-- 1 Kazuki Kazuki 14K Jul 22 03:54 \u001b[01;31m../output/722-2_X.csv.gz\u001b[0m\r\n",
      "-rw-rw-r-- 1 Kazuki Kazuki 17K Jul 24 21:28 \u001b[01;31m../output/725-1_X.csv.gz\u001b[0m\r\n",
      "-rw-rw-r-- 1 Kazuki Kazuki 15K Jul 25 05:24 \u001b[01;31m../output/725-2_X.csv.gz\u001b[0m\r\n",
      "-rw-rw-r-- 1 Kazuki Kazuki 17K Jul 25 12:16 \u001b[01;31m../output/725-3_X.csv.gz\u001b[0m\r\n",
      "-rw-rw-r-- 1 Kazuki Kazuki 13K Jul 25 15:51 \u001b[01;31m../output/725-4_X.csv.gz\u001b[0m\r\n"
     ]
    }
   ],
   "source": [
    "ls -lh ../output/*X*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:19:48.245010Z",
     "start_time": "2018-07-26T06:19:48.063241Z"
    }
   },
   "outputs": [],
   "source": [
    "df_lb802 = pd.read_csv('../output/722-2_X.csv.gz')\n",
    "df_lb800 = pd.read_csv('../output/725-1_X.csv.gz')\n",
    "df_lb804 = pd.read_csv('../output/725-2_X.csv.gz')\n",
    "df_lb803 = pd.read_csv('../output/725-4_X.csv.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:19:48.443153Z",
     "start_time": "2018-07-26T06:19:48.247312Z"
    }
   },
   "outputs": [],
   "source": [
    "sub_lb802 = pd.read_csv('../output/722-2.csv.gz')\n",
    "sub_lb800 = pd.read_csv('../output/725-1.csv.gz')\n",
    "sub_lb804 = pd.read_csv('../output/725-2.csv.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:19:49.102302Z",
     "start_time": "2018-07-26T06:19:48.445194Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9889504335682618"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub_lb804.TARGET.corr(sub_lb800.TARGET)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:19:49.107887Z",
     "start_time": "2018-07-26T06:19:49.104132Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((5, 700), (5, 601))"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lb800.shape, df_lb804.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:19:49.170865Z",
     "start_time": "2018-07-26T06:19:49.109583Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "459"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len( set(df_lb800.columns) & set( df_lb804.columns) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:20:55.562212Z",
     "start_time": "2018-07-26T06:20:55.557945Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len( set(df_lb804.columns) - set( df_lb803.columns) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:21:34.326969Z",
     "start_time": "2018-07-26T06:21:34.322545Z"
    }
   },
   "outputs": [],
   "source": [
    "f_804_m_803 = list( set(df_lb804.columns) - set( df_lb803.columns) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:44:25.368923Z",
     "start_time": "2018-07-26T06:44:25.363946Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['f302_notdelay_days_weighted_delay_sum',\n",
       " 'f505_Closed_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT_mean',\n",
       " 'f701_all_credit_std-dby-income',\n",
       " 'f501_AMT_CREDIT_SUM_OVERDUE-d-app_AMT_GOODS_PRICE_max',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_mean',\n",
       " 'f501_CREDIT_TYPE_Consumer-credit_mean',\n",
       " 'f305_notdelay_con_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f308_cons_NUM_INSTALMENT_NUMBER_pctchange_var',\n",
       " 'f105_future_payment_21m_min',\n",
       " 'f405_AMT_INST_MIN_REGULARITY_diff_diff_mean',\n",
       " 'f308_delay_cas_days_weighted_delay_tsw3_diff_var',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f308_cons_DAYS_INSTALMENT_pctchange_mean',\n",
       " 'f001_AMT_CREDIT-d-CNT_CHILDREN',\n",
       " 'f303_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f407_AMT_DRAWINGS_OTHER_CURRENT_diff_mean',\n",
       " 'f313_amt_ratio_pctchange_sum',\n",
       " 'f501_AMT_CREDIT_SUM_max',\n",
       " 'f109_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL',\n",
       " 'f507_Active_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_sum',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED',\n",
       " 'f301_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f101_approved_DAYS_FIRST_DRAWING-d-app_DAYS_BIRTH_mean',\n",
       " 'f101_nyg-high_RATE_DOWN_PAYMENT_var',\n",
       " 'f505_Closed_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_min',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_mean',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-AMT_CREDIT_max',\n",
       " 'f308_cons_days_delayed_payment_pctchange_min',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f504_DAYS_CREDIT_max',\n",
       " 'f502_AMT_CREDIT_MAX_OVERDUE_max',\n",
       " 'f201_CNT_INSTALMENT-m-CNT_INSTALMENT_FUTURE_var',\n",
       " 'f101_completed_AMT_CREDIT-d-total_debt_max',\n",
       " 'f101_approved_DAYS_FIRST_DUE-d-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f101_completed_AMT_APPLICATION-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f105_total_debt_sum-p-app-d-income',\n",
       " 'f314_days_weighted_delay_sum',\n",
       " 'f101_approved_DAYS_LAST_DUE-d-app_DAYS_BIRTH_mean',\n",
       " 'f109_days_trm-m-ldue1',\n",
       " 'f503_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_sum',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_max',\n",
       " 'f307_cons_DEP_diff_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_mean',\n",
       " 'f310_notdelay_DAYS_ENTRY_PAYMENT_sum',\n",
       " 'f101_completed_AMT_DOWN_PAYMENT_var',\n",
       " 'f101_nyg-low_normal_DAYS_DECISION_min',\n",
       " 'f310_DBD_mean',\n",
       " 'f308_delay_cas_days_weighted_delay_tsw3_diff_mean',\n",
       " 'f510_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT',\n",
       " 'f314_days_weighted_delay_diff_mean',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f103_active_DAYS_LAST_DUE_1ST_VERSION-m-app_DAYS_EMPLOYED_mean',\n",
       " 'f302_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f101_nyg-high_amt_paid_sum',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_REGISTRATION',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_ID_PUBLISH',\n",
       " 'f109_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f301_AMT_PAYMENT-d-AMT_ANNUITY_mean',\n",
       " 'f001_AMT_ANNUITY-d-CNT_CHILDREN',\n",
       " 'f701_all_credit-prevact_min',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED',\n",
       " 'f103_approved_HOUR_APPR_PROCESS_START_var',\n",
       " 'f305_delay_con_DAYS_ENTRY_PAYMENT-m-app_DAYS_REGISTRATION_max',\n",
       " 'f506_CREDIT_ACTIVE_Sold_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE_pctchange_mean',\n",
       " 'f103_active_AMT_APPLICATION-d-app_AMT_ANNUITY_min',\n",
       " 'f106_CHANNEL_TYPE-Country-wide_appref',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f203_CNT_INSTALMENT_FUTURE-d-CNT_INSTALMENT_var',\n",
       " 'f314_days_weighted_delay_pctchange_min',\n",
       " 'f201_CNT_INSTALMENT-m-CNT_INSTALMENT_FUTURE_mean',\n",
       " 'f505_Active_AMT_CREDIT_MAX_OVERDUE_mean',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_BIRTH',\n",
       " 'f308_cas_NUM_INSTALMENT_NUMBER_pctchange_var',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_max',\n",
       " 'f302_DPD_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM_min',\n",
       " 'f201_NAME_CONTRACT_STATUS_Active_sum',\n",
       " 'f109_AMT_APPLICATION-m-app_AMT_INCOME_TOTAL',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-total_debt_min',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-total_debt_max',\n",
       " 'f310_delay_days_weighted_delay_max',\n",
       " 'f305_delay_cas_DAYS_ENTRY_PAYMENT_sum',\n",
       " 'f108_AMT_CREDIT-m-app_AMT_INCOME_TOTAL_pctchange',\n",
       " 'f305_notdelay_con_DAYS_ENTRY_PAYMENT-m-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f305_con_NUM_INSTALMENT_ratio_var',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_max',\n",
       " 'f508_Active_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE_max',\n",
       " 'f106_NAME_PAYMENT_TYPE-XNA_ref',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_pctchange',\n",
       " 'f001_building_score_mode_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_var',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_mean',\n",
       " 'f106_NAME_PRODUCT_TYPE-XNA_appref']"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f_804_m_803"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:23:18.176171Z",
     "start_time": "2018-07-26T06:23:18.169948Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "401"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len( set(df_lb800.columns) & set(df_lb803.columns) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:24:04.937713Z",
     "start_time": "2018-07-26T06:24:04.931971Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "501"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len( set(df_lb804.columns) & set(df_lb803.columns) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:39:15.723066Z",
     "start_time": "2018-07-26T06:39:15.715771Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['f302_notdelay_days_weighted_delay_sum',\n",
       " 'f301_notdelay_AMT_PAYMENT-d-AMT_ANNUITY_mean',\n",
       " 'f509_DAYS_ENDDATE_FACT-m-app_DAYS_REGISTRATION',\n",
       " 'f505_Closed_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT_mean',\n",
       " 'f701_all_credit_std-dby-income',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_mean',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_REGISTRATION_min',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT',\n",
       " 'f510_AMT_CREDIT_SUM-d-app_AMT_CREDIT',\n",
       " 'f308_cons_NUM_INSTALMENT_NUMBER_pctchange_var',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH',\n",
       " 'f308_delay_cons_DPD_diff_var',\n",
       " 'f101_approved_DAYS_FIRST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f105_future_payment_21m_min',\n",
       " 'f510_AMT_CREDIT_SUM-d-app_AMT_ANNUITY',\n",
       " 'f106_NAME_PRODUCT_TYPE-XNA_appref',\n",
       " 'f405_AMT_INST_MIN_REGULARITY_diff_diff_mean',\n",
       " 'f308_delay_cas_days_weighted_delay_tsw3_diff_var',\n",
       " 'f510_AMT_CREDIT_SUM',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM',\n",
       " 'f503_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f507_Active_DAYS_CREDIT-m-app_DAYS_BIRTH_sum',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_diff',\n",
       " 'f312_days_weighted_delay_pctchange_max',\n",
       " 'f509_AMT_CREDIT_SUM-d-app_AMT_ANNUITY',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_diff_max',\n",
       " 'f302_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f509_AMT_CREDIT_MAX_OVERDUE',\n",
       " 'f101_nyg-high_AMT_DOWN_PAYMENT_max',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_max',\n",
       " 'f303_DPD_max',\n",
       " 'f001_AMT_CREDIT-d-CNT_CHILDREN',\n",
       " 'f301_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f509_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE',\n",
       " 'f314_days_weighted_delay_diff_min',\n",
       " 'f303_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f313_amt_ratio_pctchange_sum',\n",
       " 'f501_AMT_CREDIT_SUM_max',\n",
       " 'f302_notdelay_NUM_INSTALMENT_ratio_max',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f507_Active_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_min',\n",
       " 'f310_notdelay_DBD_mean',\n",
       " 'f109_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL',\n",
       " 'f507_Active_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_sum',\n",
       " 'f306_cas_DEP_diff_max',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f509_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange',\n",
       " 'f101_completed_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f301_DAYS_ENTRY_PAYMENT-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f101_approved_DAYS_FIRST_DRAWING-d-app_DAYS_BIRTH_mean',\n",
       " 'f509_SK_ID_BUREAU',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT',\n",
       " 'f105_approved_ratio',\n",
       " 'f103_nyg-low_normal_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL_max',\n",
       " 'f302_days_weighted_delay_sum',\n",
       " 'f510_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-app_DAYS_ID_PUBLISH',\n",
       " 'f101_approved_total_debt_var',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_mean',\n",
       " 'f308_cons_days_delayed_payment_pctchange_min',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f507_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_sum',\n",
       " 'f308_cas_days_delayed_payment_diff_mean',\n",
       " 'f314_days_weighted_delay_var',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED',\n",
       " 'f101_approved_DAYS_LAST_DUE-d-app_DAYS_BIRTH_mean',\n",
       " 'f509_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT',\n",
       " 'f509_DAYS_CREDIT_UPDATE_pctchange',\n",
       " 'f507_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_mean',\n",
       " 'f510_SK_ID_BUREAU',\n",
       " 'f504_DAYS_CREDIT_ENDDATE-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f308_delay_cons_days_weighted_delay_tsw3_diff_max',\n",
       " 'f507_Active_AMT_CREDIT_SUM_sum',\n",
       " 'f310_notdelay_NUM_INSTALMENT_ratio_mean',\n",
       " 'f301_DAYS_ENTRY_PAYMENT_max',\n",
       " 'f101_completed_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f101_completed_AMT_DOWN_PAYMENT_var',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_mean',\n",
       " 'f101_completed_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f310_DBD_mean',\n",
       " 'f312_days_delayed_payment_diff_mean',\n",
       " 'f101_nyg-high_AMT_GOODS_PRICE-d-total_debt_max',\n",
       " 'f312_days_delayed_payment_pctchange_min',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f510_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT',\n",
       " 'f302_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f302_delay_AMT_PAYMENT-d-app_AMT_CREDIT_min',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff',\n",
       " 'f101_nyg-high_amt_paid_sum',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-AMT_CREDIT_mean',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE',\n",
       " 'f101_approved_DAYS_LAST_DUE_1ST_VERSION-d-app_DAYS_BIRTH_var',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_mean',\n",
       " 'f510_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT',\n",
       " 'f308_cons_NUM_INSTALMENT_ratio_diff_max',\n",
       " 'f305_con_AMT_PAYMENT-d-app_AMT_ANNUITY_max',\n",
       " 'f701_all_credit-prevact_min',\n",
       " 'f509_CREDIT_TYPE',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED',\n",
       " 'f101_approved_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f405_CNT_DRAWINGS_CURRENT_var',\n",
       " 'f507_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_sum',\n",
       " 'f501_DAYS_CREDIT_UPDATE_pctchange_mean',\n",
       " 'f506_CREDIT_ACTIVE_Sold_mean',\n",
       " 'f509_AMT_CREDIT_SUM',\n",
       " 'f106_CHANNEL_TYPE-Country-wide_appref',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f203_CNT_INSTALMENT_FUTURE-d-CNT_INSTALMENT_var',\n",
       " 'f510_CREDIT_TYPE',\n",
       " 'f310_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_BIRTH',\n",
       " 'f507_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_max',\n",
       " 'f308_cas_NUM_INSTALMENT_NUMBER_pctchange_var',\n",
       " 'f510_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT',\n",
       " 'f508_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_mean',\n",
       " 'f509_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE_var',\n",
       " 'f109_AMT_APPLICATION-m-app_AMT_INCOME_TOTAL',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_var',\n",
       " 'f001_building_score_mode_sum',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-total_debt_max',\n",
       " 'f509_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL',\n",
       " 'f305_con_AMT_PAYMENT_min',\n",
       " 'f310_delay_days_weighted_delay_max',\n",
       " 'f510_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_ENDDATE',\n",
       " 'f509_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff',\n",
       " 'f305_delay_cas_DAYS_ENTRY_PAYMENT_sum',\n",
       " 'f108_AMT_GOODS_PRICE-d-AMT_CREDIT_diff',\n",
       " 'f601_Closed_STATUS_X_var',\n",
       " 'f305_notdelay_con_DAYS_ENTRY_PAYMENT-m-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f101_nyg-middle_AMT_ANNUITY-d-app_AMT_ANNUITY_max',\n",
       " 'f508_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_sum',\n",
       " 'f308_cons_days_weighted_delay_diff_mean',\n",
       " 'f101_completed_AMT_APPLICATION-d-app_AMT_CREDIT_max',\n",
       " 'f508_Active_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE_max',\n",
       " 'f502_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_sum',\n",
       " 'f106_NAME_PAYMENT_TYPE-XNA_ref',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_EMPLOYED-d-DAYS_ID_PUBLISH-m-DAYS_REGISTRATION',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_pctchange',\n",
       " 'f510_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-app_DAYS_EMPLOYED',\n",
       " 'f101_approved_AMT_ANNUITY-m-app_AMT_ANNUITY_max']"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list( set(df_lb804.columns) - set(df_lb800.columns) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T07:26:48.386395Z",
     "start_time": "2018-07-26T07:26:48.381976Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['f001_DAYS_ID_PUBLISH-m-DAYS_BIRTH',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_ID_PUBLISH_sum',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_REGISTRATION-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED-d-DAYS_ID_PUBLISH-m-DAYS_REGISTRATION',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION',\n",
       " 'f101_completed_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f101_completed_DAYS_FIRST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f506_DAYS_CREDIT-m-app_DAYS_ID_PUBLISH_sum',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_ID_PUBLISH_sum',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_REGISTRATION']"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[c for c in df_lb802.columns if '_ID_' in c]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# only LB .800 ( this might cause overfitting )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T03:13:34.802916Z",
     "start_time": "2018-07-26T03:13:34.744165Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['f001_AMT_ANNUITY-d-AMT_INCOME_TOTAL',\n",
       " 'f001_AMT_CREDIT-d-cnt_adults',\n",
       " 'f001_AMT_GOODS_PRICE-d-cnt_adults',\n",
       " 'f001_DAYS_EMPLOYED-d-DAYS_BIRTH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_BIRTH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH',\n",
       " 'f001_DAYS_ID_PUBLISH-d-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_ID_PUBLISH-d-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-d-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_EMPLOYED',\n",
       " 'f001_ENTRANCES_AVG',\n",
       " 'f001_LIVINGAREA_AVG',\n",
       " 'f001_LIVINGAREA_MODE',\n",
       " 'f101_active_AMT_ANNUITY-d-app_AMT_ANNUITY_max',\n",
       " 'f101_approved_AMT_DOWN_PAYMENT_max',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_max',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_mean',\n",
       " 'f101_approved_CNT_PAYMENT_mean',\n",
       " 'f101_approved_DAYS_DECISION_max',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_EMPLOYED_min',\n",
       " 'f101_approved_RATE_DOWN_PAYMENT_max',\n",
       " 'f101_approved_cnt_paid_var',\n",
       " 'f101_completed_AMT_ANNUITY-d-app_AMT_CREDIT_max',\n",
       " 'f101_completed_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f101_completed_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f101_completed_AMT_APPLICATION-d-app_AMT_GOODS_PRICE_max',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-total_debt_mean',\n",
       " 'f101_completed_DAYS_FIRST_DUE-d-app_DAYS_BIRTH_min',\n",
       " 'f101_completed_DAYS_LAST_DUE_1ST_VERSION-d-app_DAYS_BIRTH_min',\n",
       " 'f101_completed_RATE_DOWN_PAYMENT_max',\n",
       " 'f101_nyg-high_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f101_nyg-high_AMT_GOODS_PRICE-d-total_debt_min',\n",
       " 'f101_nyg-high_DAYS_DECISION_mean',\n",
       " 'f101_nyg-high_HOUR_APPR_PROCESS_START_max',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE-d-app_DAYS_BIRTH_min',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE_1ST_VERSION-m-app_DAYS_BIRTH_max',\n",
       " 'f101_nyg-middle_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL_max',\n",
       " 'f101_nyg-middle_AMT_APPLICATION-m-app_AMT_INCOME_TOTAL_min',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-total_debt_mean',\n",
       " 'f101_refused_AMT_GOODS_PRICE-d-total_debt_max',\n",
       " 'f101_refused_DAYS_DECISION_max',\n",
       " 'f101_refused_DAYS_DECISION_var',\n",
       " 'f102_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f102_approved_DAYS_FIRST_DUE-d-app_DAYS_BIRTH_max',\n",
       " 'f103_approved_AMT_CREDIT-d-total_debt_var',\n",
       " 'f103_approved_DAYS_FIRST_DUE-d-app_DAYS_REGISTRATION_min',\n",
       " 'f104_approved_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f105_AMT_ANNUITY_cmp_sum',\n",
       " 'f105_past_payment_15m_max',\n",
       " 'f105_prevapp_future_payment_12m',\n",
       " 'f105_prevapp_future_payment_20m',\n",
       " 'f105_prevapp_future_payment_21m',\n",
       " 'f105_total_debt_sum-p-app',\n",
       " 'f106_NAME_PAYMENT_TYPE-Cash-through-the-bank_appref',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_ANNUITY',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_CREDIT-d-app_AMT_INCOME_TOTAL',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_GOODS_PRICE',\n",
       " 'f108_DAYS_FIRST_DUE-d-app_DAYS_LAST_PHONE_CHANGE',\n",
       " 'f108_DAYS_FIRST_DUE-m-app_DAYS_BIRTH',\n",
       " 'f109_AMT_CREDIT-d-app_AMT_INCOME_TOTAL',\n",
       " 'f109_AMT_DOWN_PAYMENT',\n",
       " 'f109_AMT_GOODS_PRICE-d-total_debt',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_EMPLOYED',\n",
       " 'f109_past_payment_10m',\n",
       " 'f201_CNT_INSTALMENT_FUTURE_pctchange_mean',\n",
       " 'f201_CNT_INSTALMENT_FUTURE_pctchange_var',\n",
       " 'f203_CNT_INSTALMENT_FUTURE_pctchange_mean',\n",
       " 'f301_AMT_PAYMENT_min',\n",
       " 'f301_DPD_mean',\n",
       " 'f301_days_weighted_delay_mean',\n",
       " 'f301_delay_DAYS_ENTRY_PAYMENT-m-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f301_delay_DPD_mean',\n",
       " 'f301_notdelay_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f301_notdelay_amt_ratio_mean',\n",
       " 'f301_notdelay_days_weighted_delay_mean',\n",
       " 'f302_NUM_INSTALMENT_ratio_mean',\n",
       " 'f302_delay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f302_delay_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f302_notdelay_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f302_notdelay_NUM_INSTALMENT_ratio_mean',\n",
       " 'f303_AMT_PAYMENT-d-AMT_ANNUITY_min',\n",
       " 'f303_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f303_DAYS_ENTRY_PAYMENT-m-app_DAYS_EMPLOYED_max',\n",
       " 'f303_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f303_NUM_INSTALMENT_ratio_var',\n",
       " 'f303_days_weighted_delay_mean',\n",
       " 'f303_notdelay_NUM_INSTALMENT_ratio_var',\n",
       " 'f303_notdelay_days_weighted_delay_sum',\n",
       " 'f305_cas_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f305_con_AMT_PAYMENT-d-app_AMT_CREDIT_mean',\n",
       " 'f305_con_days_weighted_delay_min',\n",
       " 'f305_delay_con_NUM_INSTALMENT_ratio_mean',\n",
       " 'f305_notdelay_con_AMT_PAYMENT-d-AMT_ANNUITY_var',\n",
       " 'f305_notdelay_con_AMT_PAYMENT-d-app_AMT_CREDIT_mean',\n",
       " 'f305_notdelay_con_DBD_mean',\n",
       " 'f305_notdelay_con_NUM_INSTALMENT_ratio_mean',\n",
       " 'f306_cas_DEP_diff_var',\n",
       " 'f306_cons_DEP_diff_min',\n",
       " 'f308_cas_NUM_INSTALMENT_NUMBER_pctchange_mean',\n",
       " 'f308_cas_days_weighted_delay_tsw3_diff_var',\n",
       " 'f308_cons_DBD_pctchange_var',\n",
       " 'f308_notdelay_rev_AMT_PAYMENT-d-AMT_ANNUITY_pctchange_var',\n",
       " 'f309_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f309_notdelay_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f310_DAYS_ENTRY_PAYMENT-m-app_DAYS_BIRTH_var',\n",
       " 'f310_notdelay_DAYS_ENTRY_PAYMENT-m-app_DAYS_BIRTH_var',\n",
       " 'f312_days_delayed_payment_diff_var',\n",
       " 'f312_days_delayed_payment_pctchange_mean',\n",
       " 'f312_days_weighted_delay_sum',\n",
       " 'f313_days_delayed_payment_pctchange_min',\n",
       " 'f313_days_delayed_payment_pctchange_var',\n",
       " 'f313_days_weighted_delay_pctchange_mean',\n",
       " 'f313_days_weighted_delay_pctchange_min',\n",
       " 'f313_days_weighted_delay_pctchange_var',\n",
       " 'f314_days_delayed_payment_pctchange_mean',\n",
       " 'f402_AMT_DRAWINGS_ATM_CURRENT_diff_pctchange_var',\n",
       " 'f406_AMT_RECEIVABLE_PRINCIPAL_pctchange_pctchange_min',\n",
       " 'f501_AMT_CREDIT_MAX_OVERDUE_var',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_diff_mean',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_mean',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_max',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_CREDIT_max',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE_diff_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-debt-p-AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_var',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_min',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_var',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_mean',\n",
       " 'f501_AMT_CREDIT_SUM_OVERDUE-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f501_AMT_CREDIT_SUM_diff_mean',\n",
       " 'f501_AMT_CREDIT_SUM_mean',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_BIRTH_mean',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_BIRTH_min',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_EMPLOYED_mean',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_mean',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_var',\n",
       " 'f501_DAYS_CREDIT-m-app_DAYS_EMPLOYED_min',\n",
       " 'f501_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f501_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_sum',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_LAST_PHONE_CHANGE_mean',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_REGISTRATION_max',\n",
       " 'f501_DAYS_CREDIT_UPDATE_diff_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE_diff_var',\n",
       " 'f501_DAYS_CREDIT_UPDATE_pctchange_var',\n",
       " 'f501_DAYS_CREDIT_min',\n",
       " 'f501_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f501_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_min',\n",
       " 'f501_DAYS_ENDDATE_FACT-m-app_DAYS_BIRTH_min',\n",
       " 'f502_AMT_CREDIT_MAX_OVERDUE_mean',\n",
       " 'f502_AMT_CREDIT_MAX_OVERDUE_sum',\n",
       " 'f502_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f502_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_sum',\n",
       " 'f502_AMT_CREDIT_SUM-d-debt-p-AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_var',\n",
       " 'f502_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_max',\n",
       " 'f502_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f502_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_sum',\n",
       " 'f502_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_min',\n",
       " 'f502_AMT_CREDIT_SUM_DEBT-d-app_AMT_INCOME_TOTAL_sum',\n",
       " 'f502_AMT_CREDIT_SUM_max',\n",
       " 'f502_AMT_CREDIT_SUM_sum',\n",
       " 'f502_DAYS_CREDIT-m-app_DAYS_BIRTH_sum',\n",
       " 'f502_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f502_DAYS_CREDIT_UPDATE_diff_mean',\n",
       " 'f502_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_mean',\n",
       " 'f503_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f503_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_max',\n",
       " 'f503_AMT_CREDIT_SUM_sum',\n",
       " 'f503_AMT_CREDIT_SUM_var',\n",
       " 'f503_DAYS_CREDIT-d-app_DAYS_BIRTH_min',\n",
       " 'f503_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_mean',\n",
       " 'f503_DAYS_CREDIT_max',\n",
       " 'f503_DAYS_ENDDATE_FACT-d-app_DAYS_BIRTH_min',\n",
       " 'f504_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_min',\n",
       " 'f504_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_mean',\n",
       " 'f504_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f504_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_min',\n",
       " 'f505_Active_AMT_CREDIT_MAX_OVERDUE_pctchange_max',\n",
       " 'f505_Active_AMT_CREDIT_MAX_OVERDUE_var',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_diff_var',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_sum',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-debt-p-AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_var',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_diff_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM_diff_mean',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_BIRTH_min',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_var',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_REGISTRATION_min',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_BIRTH_mean',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_REGISTRATION_sum',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_max',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_min',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_sum',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_EMPLOYED_min',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_ID_PUBLISH_sum',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_diff_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_pctchange_mean',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_var',\n",
       " 'f505_Active_DAYS_CREDIT_max',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_min',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_sum',\n",
       " 'f505_Closed_AMT_CREDIT_SUM_min',\n",
       " 'f505_Closed_DAYS_CREDIT-d-app_DAYS_BIRTH_min',\n",
       " 'f505_Closed_DAYS_CREDIT-d-app_DAYS_EMPLOYED_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_var',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-app_DAYS_LAST_PHONE_CHANGE_mean',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-app_DAYS_REGISTRATION_sum',\n",
       " 'f505_Closed_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f505_Closed_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_sum',\n",
       " 'f506_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_max',\n",
       " 'f506_DAYS_CREDIT-m-app_DAYS_BIRTH_sum',\n",
       " 'f601_Closed_MONTHS_BALANCE_sum',\n",
       " 'f602_Active_MONTHS_BALANCE_mean',\n",
       " 'f701_all_credit-prevact_min-dby-income',\n",
       " 'f701_all_credit_min']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lb800.columns.difference(df_lb804.columns).tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T03:13:36.484859Z",
     "start_time": "2018-07-26T03:13:36.481660Z"
    }
   },
   "outputs": [],
   "source": [
    "feature_ovf = df_lb800.columns.difference(df_lb804.columns).tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T03:13:46.618319Z",
     "start_time": "2018-07-26T03:13:46.615056Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "241"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(feature_ovf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# only LB .804 ( this is good features )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T17:20:53.374588Z",
     "start_time": "2018-07-25T17:20:53.369258Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['f001_AMT_CREDIT-d-CNT_CHILDREN',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_EMPLOYED-d-DAYS_ID_PUBLISH-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f001_building_score_mode_sum',\n",
       " 'f101_approved_AMT_ANNUITY-m-app_AMT_ANNUITY_max',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_var',\n",
       " 'f101_approved_DAYS_FIRST_DRAWING-d-app_DAYS_BIRTH_mean',\n",
       " 'f101_approved_DAYS_FIRST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f101_approved_DAYS_LAST_DUE-d-app_DAYS_BIRTH_mean',\n",
       " 'f101_approved_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f101_approved_DAYS_LAST_DUE_1ST_VERSION-d-app_DAYS_BIRTH_var',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_REGISTRATION_min',\n",
       " 'f101_approved_total_debt_var',\n",
       " 'f101_completed_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f101_completed_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f101_completed_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f101_completed_AMT_APPLICATION-d-app_AMT_CREDIT_max',\n",
       " 'f101_completed_AMT_DOWN_PAYMENT_var',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-AMT_CREDIT_mean',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-total_debt_max',\n",
       " 'f101_nyg-high_AMT_DOWN_PAYMENT_max',\n",
       " 'f101_nyg-high_AMT_GOODS_PRICE-d-total_debt_max',\n",
       " 'f101_nyg-high_amt_paid_sum',\n",
       " 'f101_nyg-middle_AMT_ANNUITY-d-app_AMT_ANNUITY_max',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f103_nyg-low_normal_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL_max',\n",
       " 'f105_approved_ratio',\n",
       " 'f105_future_payment_21m_min',\n",
       " 'f106_CHANNEL_TYPE-Country-wide_appref',\n",
       " 'f106_NAME_PAYMENT_TYPE-XNA_ref',\n",
       " 'f106_NAME_PRODUCT_TYPE-XNA_appref',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_pctchange',\n",
       " 'f108_AMT_GOODS_PRICE-d-AMT_CREDIT_diff',\n",
       " 'f109_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL',\n",
       " 'f109_AMT_APPLICATION-m-app_AMT_INCOME_TOTAL',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_BIRTH',\n",
       " 'f203_CNT_INSTALMENT_FUTURE-d-CNT_INSTALMENT_var',\n",
       " 'f301_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f301_DAYS_ENTRY_PAYMENT-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f301_DAYS_ENTRY_PAYMENT_max',\n",
       " 'f301_notdelay_AMT_PAYMENT-d-AMT_ANNUITY_mean',\n",
       " 'f302_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f302_days_weighted_delay_sum',\n",
       " 'f302_delay_AMT_PAYMENT-d-app_AMT_CREDIT_min',\n",
       " 'f302_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f302_notdelay_NUM_INSTALMENT_ratio_max',\n",
       " 'f302_notdelay_days_weighted_delay_sum',\n",
       " 'f303_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f303_DPD_max',\n",
       " 'f305_con_AMT_PAYMENT-d-app_AMT_ANNUITY_max',\n",
       " 'f305_con_AMT_PAYMENT_min',\n",
       " 'f305_delay_cas_DAYS_ENTRY_PAYMENT_sum',\n",
       " 'f305_notdelay_con_DAYS_ENTRY_PAYMENT-m-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f306_cas_DEP_diff_max',\n",
       " 'f308_cas_NUM_INSTALMENT_NUMBER_pctchange_var',\n",
       " 'f308_cas_days_delayed_payment_diff_mean',\n",
       " 'f308_cons_NUM_INSTALMENT_NUMBER_pctchange_var',\n",
       " 'f308_cons_NUM_INSTALMENT_ratio_diff_max',\n",
       " 'f308_cons_days_delayed_payment_pctchange_min',\n",
       " 'f308_cons_days_weighted_delay_diff_mean',\n",
       " 'f308_delay_cas_days_weighted_delay_tsw3_diff_var',\n",
       " 'f308_delay_cons_DPD_diff_var',\n",
       " 'f308_delay_cons_days_weighted_delay_tsw3_diff_max',\n",
       " 'f310_DBD_mean',\n",
       " 'f310_delay_days_weighted_delay_max',\n",
       " 'f310_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f310_notdelay_DBD_mean',\n",
       " 'f310_notdelay_NUM_INSTALMENT_ratio_mean',\n",
       " 'f312_days_delayed_payment_diff_mean',\n",
       " 'f312_days_delayed_payment_pctchange_min',\n",
       " 'f312_days_weighted_delay_pctchange_max',\n",
       " 'f313_amt_ratio_pctchange_sum',\n",
       " 'f314_days_weighted_delay_diff_min',\n",
       " 'f314_days_weighted_delay_var',\n",
       " 'f405_AMT_INST_MIN_REGULARITY_diff_diff_mean',\n",
       " 'f405_CNT_DRAWINGS_CURRENT_var',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_mean',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_max',\n",
       " 'f501_AMT_CREDIT_SUM_max',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE_var',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE_pctchange_mean',\n",
       " 'f502_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_sum',\n",
       " 'f503_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f504_DAYS_CREDIT_ENDDATE-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_mean',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_diff_max',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_mean',\n",
       " 'f505_Closed_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT_mean',\n",
       " 'f506_CREDIT_ACTIVE_Sold_mean',\n",
       " 'f507_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_sum',\n",
       " 'f507_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_max',\n",
       " 'f507_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_mean',\n",
       " 'f507_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_sum',\n",
       " 'f507_Active_AMT_CREDIT_SUM_sum',\n",
       " 'f507_Active_DAYS_CREDIT-m-app_DAYS_BIRTH_sum',\n",
       " 'f507_Active_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_sum',\n",
       " 'f507_Active_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_min',\n",
       " 'f508_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_sum',\n",
       " 'f508_Active_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_mean',\n",
       " 'f508_Active_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE_max',\n",
       " 'f509_AMT_CREDIT_MAX_OVERDUE',\n",
       " 'f509_AMT_CREDIT_SUM',\n",
       " 'f509_AMT_CREDIT_SUM-d-app_AMT_ANNUITY',\n",
       " 'f509_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL',\n",
       " 'f509_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT',\n",
       " 'f509_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff',\n",
       " 'f509_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT',\n",
       " 'f509_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_diff',\n",
       " 'f509_CREDIT_TYPE',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-app_DAYS_EMPLOYED',\n",
       " 'f509_DAYS_CREDIT_ENDDATE-m-app_DAYS_ID_PUBLISH',\n",
       " 'f509_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE',\n",
       " 'f509_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT',\n",
       " 'f509_DAYS_CREDIT_UPDATE_pctchange',\n",
       " 'f509_DAYS_ENDDATE_FACT-m-app_DAYS_REGISTRATION',\n",
       " 'f509_SK_ID_BUREAU',\n",
       " 'f510_AMT_CREDIT_SUM',\n",
       " 'f510_AMT_CREDIT_SUM-d-app_AMT_ANNUITY',\n",
       " 'f510_AMT_CREDIT_SUM-d-app_AMT_CREDIT',\n",
       " 'f510_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE',\n",
       " 'f510_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT',\n",
       " 'f510_CREDIT_TYPE',\n",
       " 'f510_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT',\n",
       " 'f510_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE',\n",
       " 'f510_DAYS_CREDIT_UPDATE-m-DAYS_ENDDATE_FACT',\n",
       " 'f510_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_ENDDATE',\n",
       " 'f510_SK_ID_BUREAU',\n",
       " 'f601_Closed_STATUS_X_var',\n",
       " 'f701_all_credit-prevact_min',\n",
       " 'f701_all_credit_std-dby-income']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lb804.columns.difference(df_lb800.columns).tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:04:46.072019Z",
     "start_time": "2018-07-26T06:04:46.068727Z"
    }
   },
   "outputs": [],
   "source": [
    "only804 = df_lb804.columns.difference(df_lb800.columns).tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:04:53.250215Z",
     "start_time": "2018-07-26T06:04:53.246921Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "142"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(only804)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-26T06:05:41.798572Z",
     "start_time": "2018-07-26T06:05:41.794916Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'f001_building_score_mode_sum',\n",
       " 'f101_approved_AMT_ANNUITY-m-app_AMT_ANNUITY_max',\n",
       " 'f101_completed_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f101_completed_AMT_APPLICATION-d-app_AMT_CREDIT_max',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-total_debt_max',\n",
       " 'f101_nyg-high_AMT_DOWN_PAYMENT_max',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f103_nyg-low_normal_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL_max',\n",
       " 'f105_approved_ratio',\n",
       " 'f105_future_payment_21m_min',\n",
       " 'f109_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL',\n",
       " 'f301_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f301_DAYS_ENTRY_PAYMENT-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f301_DAYS_ENTRY_PAYMENT_max',\n",
       " 'f301_notdelay_AMT_PAYMENT-d-AMT_ANNUITY_mean',\n",
       " 'f302_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f302_days_weighted_delay_sum',\n",
       " 'f302_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f302_notdelay_NUM_INSTALMENT_ratio_max',\n",
       " 'f302_notdelay_days_weighted_delay_sum',\n",
       " 'f303_DPD_max',\n",
       " 'f305_con_AMT_PAYMENT_min',\n",
       " 'f305_delay_cas_DAYS_ENTRY_PAYMENT_sum',\n",
       " 'f308_cas_days_delayed_payment_diff_mean',\n",
       " 'f308_delay_cons_days_weighted_delay_tsw3_diff_max',\n",
       " 'f310_DBD_mean',\n",
       " 'f310_delay_days_weighted_delay_max',\n",
       " 'f310_notdelay_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f310_notdelay_NUM_INSTALMENT_ratio_mean',\n",
       " 'f405_CNT_DRAWINGS_CURRENT_var',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_mean',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_max',\n",
       " 'f501_AMT_CREDIT_SUM_max',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_mean',\n",
       " 'f503_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_mean',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_diff_max'}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df_lb802.columns) & set(only804)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# only LB .800 from .802 ( this might cause overfitting )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T17:36:49.118451Z",
     "start_time": "2018-07-25T17:36:49.108809Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['f001_AMT_ANNUITY-d-CNT_CHILDREN',\n",
       " 'f001_AMT_CREDIT-d-cnt_adults',\n",
       " 'f001_AMT_GOODS_PRICE-d-cnt_adults',\n",
       " 'f001_AMT_INCOME_TOTAL-d-CNT_FAM_MEMBERS',\n",
       " 'f001_AMT_REQ_CREDIT_BUREAU_QRT',\n",
       " 'f001_APARTMENTS_AVG',\n",
       " 'f001_DAYS_EMPLOYED-d-DAYS_BIRTH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_BIRTH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_ID_PUBLISH-d-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_ID_PUBLISH-d-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_BIRTH-d-DAYS_REGISTRATION-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_ID_PUBLISH-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-d-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_EMPLOYED-d-DAYS_ID_PUBLISH-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_ID_PUBLISH',\n",
       " 'f001_ENTRANCES_AVG',\n",
       " 'f001_LIVINGAREA_MODE',\n",
       " 'f001_NEW_CAR_TO_EMPLOY_RATIO',\n",
       " 'f001_building_score_medi_mean',\n",
       " 'f001_building_score_mode_std',\n",
       " 'f002_WEEKDAY_APPR_PROCESS_START',\n",
       " 'f101_active_AMT_ANNUITY-d-app_AMT_ANNUITY_max',\n",
       " 'f101_active_amt_unpaid_var',\n",
       " 'f101_approved_AMT_CREDIT-d-total_debt_max',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_max',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_mean',\n",
       " 'f101_approved_DAYS_DECISION_max',\n",
       " 'f101_approved_DAYS_FIRST_DUE-d-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f101_approved_DAYS_LAST_DUE_1ST_VERSION-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_BIRTH_min',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_EMPLOYED_min',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f101_approved_HOUR_APPR_PROCESS_START_mean',\n",
       " 'f101_approved_cnt_paid_var',\n",
       " 'f101_completed_AMT_CREDIT-d-total_debt_mean',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-total_debt_mean',\n",
       " 'f101_completed_DAYS_FIRST_DUE-d-app_DAYS_BIRTH_min',\n",
       " 'f101_completed_DAYS_LAST_DUE_1ST_VERSION-d-app_DAYS_BIRTH_min',\n",
       " 'f101_completed_RATE_DOWN_PAYMENT_max',\n",
       " 'f101_completed_WEEKDAY_APPR_PROCESS_START_FRIDAY_mean',\n",
       " 'f101_nyg-high_AMT_CREDIT-d-total_debt_min',\n",
       " 'f101_nyg-high_DAYS_DECISION_mean',\n",
       " 'f101_nyg-high_HOUR_APPR_PROCESS_START_max',\n",
       " 'f101_nyg-high_RATE_DOWN_PAYMENT_max',\n",
       " 'f101_nyg-high_RATE_DOWN_PAYMENT_var',\n",
       " 'f101_nyg-low_action_AMT_GOODS_PRICE-d-AMT_CREDIT_max',\n",
       " 'f101_nyg-low_normal_DAYS_DECISION_min',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE-d-app_DAYS_BIRTH_min',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE-d-app_DAYS_REGISTRATION_min',\n",
       " 'f101_nyg-middle_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL_max',\n",
       " 'f101_nyg-middle_AMT_APPLICATION-m-app_AMT_INCOME_TOTAL_min',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-AMT_CREDIT_max',\n",
       " 'f101_nyg-middle_AMT_GOODS_PRICE-d-AMT_CREDIT_mean',\n",
       " 'f101_nyg-middle_PRODUCT_COMBINATION_POS-industry-with-interest_mean',\n",
       " 'f102_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_mean',\n",
       " 'f102_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f102_approved_DAYS_FIRST_DUE-d-app_DAYS_BIRTH_max',\n",
       " 'f102_approved_DAYS_FIRST_DUE-m-app_DAYS_REGISTRATION_max',\n",
       " 'f102_nyg-middle_DAYS_DECISION_max',\n",
       " 'f102_refused_DAYS_DECISION_var',\n",
       " 'f103_active_DAYS_LAST_DUE_1ST_VERSION-m-app_DAYS_EMPLOYED_mean',\n",
       " 'f103_approved_AMT_CREDIT-d-total_debt_var',\n",
       " 'f103_approved_DAYS_FIRST_DUE-d-app_DAYS_REGISTRATION_min',\n",
       " 'f103_approved_DAYS_LAST_DUE-d-app_DAYS_BIRTH_mean',\n",
       " 'f103_approved_HOUR_APPR_PROCESS_START_var',\n",
       " 'f103_nyg-low_action_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f104_approved_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f104_completed_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL_max',\n",
       " 'f105_DAYS_DECISION_max',\n",
       " 'f105_past_payment_15m_max',\n",
       " 'f105_total_debt_sum-p-app',\n",
       " 'f106_NAME_PAYMENT_TYPE-Cash-through-the-bank_appref',\n",
       " 'f106_NAME_PAYMENT_TYPE-XNA_appref',\n",
       " 'f106_NAME_PORTFOLIO-XNA',\n",
       " 'f106_NAME_SELLER_INDUSTRY-Connectivity_app',\n",
       " 'f106_NAME_SELLER_INDUSTRY-Connectivity_appref',\n",
       " 'f108_AMT_ANNUITY',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_CREDIT-d-app_AMT_INCOME_TOTAL',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_GOODS_PRICE',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL',\n",
       " 'f108_AMT_CREDIT-m-app_AMT_INCOME_TOTAL_pctchange',\n",
       " 'f108_AMT_GOODS_PRICE-d-AMT_CREDIT',\n",
       " 'f108_AMT_GOODS_PRICE-d-total_debt',\n",
       " 'f108_DAYS_FIRST_DUE-d-app_DAYS_LAST_PHONE_CHANGE',\n",
       " 'f108_DAYS_LAST_DUE_1ST_VERSION-m-app_DAYS_REGISTRATION',\n",
       " 'f108_NAME_TYPE_SUITE',\n",
       " 'f108_past_payment_11m',\n",
       " 'f109_AMT_ANNUITY-m-app_AMT_ANNUITY-d-app_AMT_INCOME_TOTAL',\n",
       " 'f109_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL',\n",
       " 'f109_AMT_CREDIT-d-app_AMT_INCOME_TOTAL',\n",
       " 'f109_AMT_DOWN_PAYMENT',\n",
       " 'f109_DAYS_FIRST_DUE',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_EMPLOYED',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_ID_PUBLISH',\n",
       " 'f109_DAYS_FIRST_DUE-m-app_DAYS_LAST_PHONE_CHANGE',\n",
       " 'f109_RATE_DOWN_PAYMENT',\n",
       " 'f109_days_trm-m-fdue',\n",
       " 'f109_past_payment_10m',\n",
       " 'f201_CNT_INSTALMENT-m-CNT_INSTALMENT_FUTURE_var',\n",
       " 'f201_CNT_INSTALMENT_FUTURE-d-CNT_INSTALMENT_var',\n",
       " 'f201_CNT_INSTALMENT_FUTURE_pctchange_mean',\n",
       " 'f201_CNT_INSTALMENT_FUTURE_pctchange_var',\n",
       " 'f201_NAME_CONTRACT_STATUS_Active_sum',\n",
       " 'f203_CNT_INSTALMENT_FUTURE-d-CNT_INSTALMENT_mean',\n",
       " 'f301_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f301_AMT_PAYMENT_min',\n",
       " 'f301_NUM_INSTALMENT_ratio_mean',\n",
       " 'f301_delay_DAYS_ENTRY_PAYMENT-m-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f301_delay_DPD_mean',\n",
       " 'f301_delay_NUM_INSTALMENT_ratio_min',\n",
       " 'f301_notdelay_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f301_notdelay_amt_ratio_mean',\n",
       " 'f301_notdelay_days_weighted_delay_mean',\n",
       " 'f302_delay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f302_delay_AMT_PAYMENT-d-app_AMT_GOODS_PRICE_max',\n",
       " 'f303_AMT_PAYMENT-d-AMT_ANNUITY_min',\n",
       " 'f303_DAYS_ENTRY_PAYMENT-m-app_DAYS_EMPLOYED_max',\n",
       " 'f303_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f303_DBD_sum',\n",
       " 'f303_NUM_INSTALMENT_ratio_var',\n",
       " 'f303_days_weighted_delay_mean',\n",
       " 'f303_days_weighted_delay_min',\n",
       " 'f303_notdelay_NUM_INSTALMENT_ratio_var',\n",
       " 'f303_notdelay_days_weighted_delay_sum',\n",
       " 'f304_DAYS_ENTRY_PAYMENT-m-app_DAYS_BIRTH_var',\n",
       " 'f305_con_DAYS_ENTRY_PAYMENT-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f305_con_DBD_mean',\n",
       " 'f305_con_NUM_INSTALMENT_ratio_var',\n",
       " 'f305_con_days_weighted_delay_min',\n",
       " 'f305_delay_con_DAYS_ENTRY_PAYMENT-m-app_DAYS_REGISTRATION_max',\n",
       " 'f305_delay_con_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f305_delay_con_NUM_INSTALMENT_ratio_mean',\n",
       " 'f305_notdelay_cas_AMT_PAYMENT-d-app_AMT_CREDIT_var',\n",
       " 'f305_notdelay_con_AMT_PAYMENT-d-AMT_ANNUITY_var',\n",
       " 'f305_notdelay_con_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f305_notdelay_con_DBD_mean',\n",
       " 'f305_notdelay_con_NUM_INSTALMENT_ratio_mean',\n",
       " 'f305_notdelay_con_NUM_INSTALMENT_ratio_var',\n",
       " 'f305_notdelay_con_days_weighted_delay_mean',\n",
       " 'f306_cas_DEP_diff_mean',\n",
       " 'f306_cas_DEP_diff_var',\n",
       " 'f306_cons_DEP_diff_min',\n",
       " 'f306_cons_DEP_diff_var',\n",
       " 'f307_cons_DEP_diff_min',\n",
       " 'f308_cas_NUM_INSTALMENT_NUMBER_pctchange_mean',\n",
       " 'f308_cas_days_weighted_delay_diff_mean',\n",
       " 'f308_cas_days_weighted_delay_tsw3_diff_var',\n",
       " 'f308_cons_AMT_INSTALMENT_pctchange_mean',\n",
       " 'f308_cons_AMT_INSTALMENT_pctchange_var',\n",
       " 'f308_cons_DAYS_INSTALMENT_pctchange_var',\n",
       " 'f308_cons_DBD_diff_mean',\n",
       " 'f308_cons_DBD_pctchange_var',\n",
       " 'f308_cons_NUM_INSTALMENT_NUMBER_pctchange_mean',\n",
       " 'f308_cons_NUM_INSTALMENT_ratio_diff_mean',\n",
       " 'f308_cons_days_delayed_payment_diff_mean',\n",
       " 'f308_cons_days_delayed_payment_pctchange_var',\n",
       " 'f308_cons_days_weighted_delay_tsw3_diff_max',\n",
       " 'f308_delay_cas_days_weighted_delay_tsw3_diff_mean',\n",
       " 'f308_delay_cons_AMT_PAYMENT_diff_min',\n",
       " 'f308_notdelay_rev_AMT_PAYMENT-d-AMT_ANNUITY_pctchange_var',\n",
       " 'f308_notdelay_rev_NUM_INSTALMENT_NUMBER_diff_mean',\n",
       " 'f309_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f309_notdelay_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f310_DAYS_ENTRY_PAYMENT-m-app_DAYS_BIRTH_var',\n",
       " 'f310_notdelay_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f310_notdelay_DAYS_ENTRY_PAYMENT_sum',\n",
       " 'f311_AMT_PAYMENT-m-AMT_ANNUITY_pctchange_diff_min',\n",
       " 'f311_AMT_PAYMENT-m-AMT_ANNUITY_pctchange_min',\n",
       " 'f312_days_delayed_payment_diff_var',\n",
       " 'f312_days_delayed_payment_pctchange_mean',\n",
       " 'f312_days_delayed_payment_pctchange_var',\n",
       " 'f312_days_weighted_delay_diff_mean',\n",
       " 'f312_days_weighted_delay_diff_sum',\n",
       " 'f312_days_weighted_delay_max',\n",
       " 'f312_days_weighted_delay_sum',\n",
       " 'f313_days_delayed_payment_diff_mean',\n",
       " 'f313_days_delayed_payment_pctchange_mean',\n",
       " 'f313_days_delayed_payment_pctchange_min',\n",
       " 'f313_days_delayed_payment_pctchange_var',\n",
       " 'f313_days_weighted_delay_diff_mean',\n",
       " 'f313_days_weighted_delay_diff_sum',\n",
       " 'f313_days_weighted_delay_pctchange_mean',\n",
       " 'f313_days_weighted_delay_pctchange_min',\n",
       " 'f313_days_weighted_delay_pctchange_var',\n",
       " 'f314_AMT_PAYMENT-d-AMT_ANNUITY_mean',\n",
       " 'f314_days_delayed_payment_diff_max',\n",
       " 'f314_days_delayed_payment_pctchange_mean',\n",
       " 'f314_days_weighted_delay_diff_mean',\n",
       " 'f314_days_weighted_delay_diff_sum',\n",
       " 'f314_days_weighted_delay_pctchange_mean',\n",
       " 'f314_days_weighted_delay_pctchange_min',\n",
       " 'f314_days_weighted_delay_sum',\n",
       " 'f401_AMT_BALANCE-d-AMT_DRAWINGS_CURRENT_diff_max',\n",
       " 'f402_AMT_DRAWINGS_ATM_CURRENT_diff_pctchange_var',\n",
       " 'f406_AMT_RECEIVABLE_PRINCIPAL_pctchange_pctchange_min',\n",
       " 'f407_AMT_DRAWINGS_OTHER_CURRENT_diff_mean',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_diff_mean',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_max',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_mean',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_CREDIT_max',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE_diff_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_min',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_var',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_mean',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_diff_min',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_mean',\n",
       " 'f501_CREDIT_TYPE_Consumer-credit_mean',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_EMPLOYED_mean',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_EMPLOYED_min',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_EMPLOYED_var',\n",
       " 'f501_DAYS_CREDIT-m-app_DAYS_EMPLOYED_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_max',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_ID_PUBLISH_max',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_LAST_PHONE_CHANGE_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_var',\n",
       " 'f501_DAYS_CREDIT_UPDATE_diff_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE_diff_var',\n",
       " 'f501_DAYS_CREDIT_UPDATE_max',\n",
       " 'f501_DAYS_CREDIT_UPDATE_pctchange_max',\n",
       " 'f501_DAYS_CREDIT_UPDATE_var',\n",
       " 'f501_DAYS_CREDIT_min',\n",
       " 'f501_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f501_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_ENDDATE_max',\n",
       " 'f501_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_min',\n",
       " 'f502_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f502_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f502_DAYS_CREDIT_UPDATE_diff_mean',\n",
       " 'f503_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f503_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_sum',\n",
       " 'f503_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_diff_mean',\n",
       " 'f503_AMT_CREDIT_SUM_var',\n",
       " 'f503_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_mean',\n",
       " 'f503_DAYS_CREDIT_max',\n",
       " 'f503_DAYS_ENDDATE_FACT-d-app_DAYS_BIRTH_min',\n",
       " 'f504_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f504_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_min',\n",
       " 'f504_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_mean',\n",
       " 'f504_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f504_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_ENDDATE_max',\n",
       " 'f504_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_max',\n",
       " 'f504_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_min',\n",
       " 'f504_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_ENDDATE_mean',\n",
       " 'f504_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_ENDDATE_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_diff_var',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_LIMIT-d-app_AMT_CREDIT_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM_diff_mean',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_max',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_max',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_sum',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_EMPLOYED_min',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_diff_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_max',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_mean',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_pctchange_mean',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_var',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_sum',\n",
       " 'f505_Closed_AMT_CREDIT_SUM_min',\n",
       " 'f505_Closed_DAYS_CREDIT-d-app_DAYS_EMPLOYED_min',\n",
       " 'f505_Closed_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE_sum',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_var',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-app_DAYS_REGISTRATION_sum',\n",
       " 'f505_Closed_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_mean',\n",
       " 'f505_Closed_DAYS_CREDIT_UPDATE_diff_mean',\n",
       " 'f505_Closed_DAYS_CREDIT_UPDATE_pctchange_max',\n",
       " 'f505_Closed_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f505_Closed_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_sum',\n",
       " 'f505_Closed_DAYS_ENDDATE_FACT-m-app_DAYS_BIRTH_sum',\n",
       " 'f506_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_mean',\n",
       " 'f506_DAYS_CREDIT_ENDDATE-d-app_DAYS_LAST_PHONE_CHANGE_max',\n",
       " 'f601_Closed_MONTHS_BALANCE_sum',\n",
       " 'f601_Closed_STATUS_1_var',\n",
       " 'f602_Active_CURR-BUREAU_cnt_var',\n",
       " 'f602_Active_MONTHS_BALANCE_mean',\n",
       " 'f701_all_credit-prevact_min-dby-income',\n",
       " 'f701_all_credit_min',\n",
       " 'f750_y_pred']"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lb800.columns.difference(df_lb802.columns).tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:04:46.919593Z",
     "start_time": "2018-07-25T18:04:46.908032Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'f001_AMT_CREDIT-d-cnt_adults',\n",
       " 'f001_AMT_GOODS_PRICE-d-cnt_adults',\n",
       " 'f001_DAYS_EMPLOYED-d-DAYS_BIRTH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_ID_PUBLISH-m-DAYS_BIRTH',\n",
       " 'f001_DAYS_EMPLOYED-m-DAYS_BIRTH-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_BIRTH',\n",
       " 'f001_DAYS_ID_PUBLISH-d-DAYS_EMPLOYED',\n",
       " 'f001_DAYS_ID_PUBLISH-d-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-d-DAYS_ID_PUBLISH',\n",
       " 'f001_DAYS_LAST_PHONE_CHANGE-m-DAYS_EMPLOYED-d-DAYS_LAST_PHONE_CHANGE-m-DAYS_REGISTRATION',\n",
       " 'f001_DAYS_REGISTRATION-m-DAYS_EMPLOYED',\n",
       " 'f001_ENTRANCES_AVG',\n",
       " 'f001_LIVINGAREA_MODE',\n",
       " 'f101_active_AMT_ANNUITY-d-app_AMT_ANNUITY_max',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_max',\n",
       " 'f101_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_mean',\n",
       " 'f101_approved_DAYS_DECISION_max',\n",
       " 'f101_approved_DAYS_TERMINATION-d-app_DAYS_EMPLOYED_min',\n",
       " 'f101_approved_cnt_paid_var',\n",
       " 'f101_completed_AMT_GOODS_PRICE-d-total_debt_mean',\n",
       " 'f101_completed_DAYS_FIRST_DUE-d-app_DAYS_BIRTH_min',\n",
       " 'f101_completed_DAYS_LAST_DUE_1ST_VERSION-d-app_DAYS_BIRTH_min',\n",
       " 'f101_completed_RATE_DOWN_PAYMENT_max',\n",
       " 'f101_nyg-high_DAYS_DECISION_mean',\n",
       " 'f101_nyg-high_HOUR_APPR_PROCESS_START_max',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE-d-app_DAYS_BIRTH_min',\n",
       " 'f101_nyg-low_normal_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f101_nyg-middle_AMT_ANNUITY-m-app_AMT_INCOME_TOTAL_max',\n",
       " 'f101_nyg-middle_AMT_APPLICATION-m-app_AMT_INCOME_TOTAL_min',\n",
       " 'f102_approved_AMT_GOODS_PRICE-d-AMT_CREDIT_min',\n",
       " 'f102_approved_DAYS_FIRST_DUE-d-app_DAYS_BIRTH_max',\n",
       " 'f103_approved_AMT_CREDIT-d-total_debt_var',\n",
       " 'f103_approved_DAYS_FIRST_DUE-d-app_DAYS_REGISTRATION_min',\n",
       " 'f104_approved_DAYS_LAST_DUE-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f105_past_payment_15m_max',\n",
       " 'f105_total_debt_sum-p-app',\n",
       " 'f106_NAME_PAYMENT_TYPE-Cash-through-the-bank_appref',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_CREDIT-d-app_AMT_INCOME_TOTAL',\n",
       " 'f108_AMT_ANNUITY-m-app_AMT_GOODS_PRICE',\n",
       " 'f108_DAYS_FIRST_DUE-d-app_DAYS_LAST_PHONE_CHANGE',\n",
       " 'f109_AMT_CREDIT-d-app_AMT_INCOME_TOTAL',\n",
       " 'f109_AMT_DOWN_PAYMENT',\n",
       " 'f109_DAYS_FIRST_DUE-d-app_DAYS_EMPLOYED',\n",
       " 'f109_past_payment_10m',\n",
       " 'f201_CNT_INSTALMENT_FUTURE_pctchange_mean',\n",
       " 'f201_CNT_INSTALMENT_FUTURE_pctchange_var',\n",
       " 'f301_AMT_PAYMENT_min',\n",
       " 'f301_delay_DAYS_ENTRY_PAYMENT-m-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f301_delay_DPD_mean',\n",
       " 'f301_notdelay_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f301_notdelay_amt_ratio_mean',\n",
       " 'f301_notdelay_days_weighted_delay_mean',\n",
       " 'f302_delay_AMT_PAYMENT-d-app_AMT_ANNUITY_min',\n",
       " 'f303_AMT_PAYMENT-d-AMT_ANNUITY_min',\n",
       " 'f303_DAYS_ENTRY_PAYMENT-m-app_DAYS_EMPLOYED_max',\n",
       " 'f303_DAYS_ENTRY_PAYMENT_mean',\n",
       " 'f303_NUM_INSTALMENT_ratio_var',\n",
       " 'f303_days_weighted_delay_mean',\n",
       " 'f303_notdelay_NUM_INSTALMENT_ratio_var',\n",
       " 'f303_notdelay_days_weighted_delay_sum',\n",
       " 'f305_con_days_weighted_delay_min',\n",
       " 'f305_delay_con_NUM_INSTALMENT_ratio_mean',\n",
       " 'f305_notdelay_con_AMT_PAYMENT-d-AMT_ANNUITY_var',\n",
       " 'f305_notdelay_con_DBD_mean',\n",
       " 'f305_notdelay_con_NUM_INSTALMENT_ratio_mean',\n",
       " 'f306_cas_DEP_diff_var',\n",
       " 'f306_cons_DEP_diff_min',\n",
       " 'f308_cas_NUM_INSTALMENT_NUMBER_pctchange_mean',\n",
       " 'f308_cas_days_weighted_delay_tsw3_diff_var',\n",
       " 'f308_cons_DBD_pctchange_var',\n",
       " 'f308_notdelay_rev_AMT_PAYMENT-d-AMT_ANNUITY_pctchange_var',\n",
       " 'f309_AMT_PAYMENT-d-app_AMT_ANNUITY_mean',\n",
       " 'f309_notdelay_AMT_PAYMENT-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f310_DAYS_ENTRY_PAYMENT-m-app_DAYS_BIRTH_var',\n",
       " 'f312_days_delayed_payment_diff_var',\n",
       " 'f312_days_delayed_payment_pctchange_mean',\n",
       " 'f312_days_weighted_delay_sum',\n",
       " 'f313_days_delayed_payment_pctchange_min',\n",
       " 'f313_days_delayed_payment_pctchange_var',\n",
       " 'f313_days_weighted_delay_pctchange_mean',\n",
       " 'f313_days_weighted_delay_pctchange_min',\n",
       " 'f313_days_weighted_delay_pctchange_var',\n",
       " 'f314_days_delayed_payment_pctchange_mean',\n",
       " 'f402_AMT_DRAWINGS_ATM_CURRENT_diff_pctchange_var',\n",
       " 'f406_AMT_RECEIVABLE_PRINCIPAL_pctchange_pctchange_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_diff_mean',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_max',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_CREDIT_max',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE_diff_min',\n",
       " 'f501_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_min',\n",
       " 'f501_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_var',\n",
       " 'f501_AMT_CREDIT_SUM_DEBT-p-AMT_CREDIT_SUM_LIMIT_mean',\n",
       " 'f501_DAYS_CREDIT-d-app_DAYS_EMPLOYED_mean',\n",
       " 'f501_DAYS_CREDIT-m-app_DAYS_EMPLOYED_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_min',\n",
       " 'f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_LAST_PHONE_CHANGE_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE_diff_mean',\n",
       " 'f501_DAYS_CREDIT_UPDATE_diff_var',\n",
       " 'f501_DAYS_CREDIT_min',\n",
       " 'f501_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_mean',\n",
       " 'f501_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_min',\n",
       " 'f502_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f502_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f502_DAYS_CREDIT_UPDATE_diff_mean',\n",
       " 'f503_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_min',\n",
       " 'f503_AMT_CREDIT_SUM_var',\n",
       " 'f503_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_mean',\n",
       " 'f503_DAYS_CREDIT_max',\n",
       " 'f503_DAYS_ENDDATE_FACT-d-app_DAYS_BIRTH_min',\n",
       " 'f504_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_min',\n",
       " 'f504_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff_mean',\n",
       " 'f504_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var',\n",
       " 'f504_DAYS_CREDIT_UPDATE-m-DAYS_CREDIT_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_diff_var',\n",
       " 'f505_Active_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchange_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_diff_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_max',\n",
       " 'f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctchange_mean',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT-d-app_AMT_INCOME_TOTAL_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_DEBT_min',\n",
       " 'f505_Active_AMT_CREDIT_SUM_diff_mean',\n",
       " 'f505_Active_DAYS_CREDIT-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Active_DAYS_CREDIT-m-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_max',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_sum',\n",
       " 'f505_Active_DAYS_CREDIT_ENDDATE-m-app_DAYS_EMPLOYED_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_diff_min',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_pctchange_mean',\n",
       " 'f505_Active_DAYS_CREDIT_UPDATE_var',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-d-app_AMT_INCOME_TOTAL_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_mean',\n",
       " 'f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_sum',\n",
       " 'f505_Closed_AMT_CREDIT_SUM_min',\n",
       " 'f505_Closed_DAYS_CREDIT-d-app_DAYS_EMPLOYED_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_sum',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_PUBLISH_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATION_var',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-app_DAYS_BIRTH_min',\n",
       " 'f505_Closed_DAYS_CREDIT_ENDDATE-m-app_DAYS_REGISTRATION_sum',\n",
       " 'f505_Closed_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_var',\n",
       " 'f505_Closed_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_sum',\n",
       " 'f601_Closed_MONTHS_BALANCE_sum',\n",
       " 'f602_Active_MONTHS_BALANCE_mean',\n",
       " 'f701_all_credit-prevact_min-dby-income',\n",
       " 'f701_all_credit_min'}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df_lb800.columns.difference(df_lb804.columns).tolist()) & set( df_lb800.columns.difference(df_lb802.columns).tolist() )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# load LB 800 and 804"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:48:55.553707Z",
     "start_time": "2018-07-25T18:47:49.458281Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 700/700 [01:04<00:00, 10.78it/s]\n"
     ]
    }
   ],
   "source": [
    "files = ('../feature/train_' + df_lb800.columns + '.f').tolist()\n",
    "X_train_lb800 = pd.concat([\n",
    "                pd.read_feather(f) for f in tqdm(files, mininterval=60)\n",
    "               ], axis=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:49:24.574794Z",
     "start_time": "2018-07-25T18:48:55.555614Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 700/700 [00:28<00:00, 24.30it/s]\n"
     ]
    }
   ],
   "source": [
    "files = ('../feature/test_' + df_lb800.columns + '.f').tolist()\n",
    "X_test_lb800 = pd.concat([\n",
    "                pd.read_feather(f) for f in tqdm(files, mininterval=60)\n",
    "               ], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:49:40.935845Z",
     "start_time": "2018-07-25T18:49:24.576577Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 601/601 [00:15<00:00, 39.07it/s]\n"
     ]
    }
   ],
   "source": [
    "files = ('../feature/train_' + df_lb804.columns + '.f').tolist()\n",
    "X_train_lb804 = pd.concat([\n",
    "                pd.read_feather(f) for f in tqdm(files, mininterval=60)\n",
    "               ], axis=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:49:47.270782Z",
     "start_time": "2018-07-25T18:49:40.937518Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 601/601 [00:06<00:00, 96.20it/s]\n"
     ]
    }
   ],
   "source": [
    "files = ('../feature/test_' + df_lb804.columns + '.f').tolist()\n",
    "X_test_lb804 = pd.concat([\n",
    "                pd.read_feather(f) for f in tqdm(files, mininterval=60)\n",
    "               ], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:49:47.515422Z",
     "start_time": "2018-07-25T18:49:47.272374Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20/20 [00:00<00:00, 88.49it/s]\n"
     ]
    }
   ],
   "source": [
    "y = utils.read_pickles('../data/label').TARGET"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:49:47.522425Z",
     "start_time": "2018-07-25T18:49:47.516735Z"
    }
   },
   "outputs": [],
   "source": [
    "param = {\n",
    "         'objective': 'binary',\n",
    "         'metric': 'auc',\n",
    "         'learning_rate': 0.01,\n",
    "         'max_depth': 6,\n",
    "         'num_leaves': 63,\n",
    "         'max_bin': 255,\n",
    "         \n",
    "         'min_child_weight': 10,\n",
    "         'min_data_in_leaf': 150,\n",
    "         'reg_lambda': 0.5,  # L2 regularization term on weights.\n",
    "         'reg_alpha': 0.5,  # L1 regularization term on weights.\n",
    "         \n",
    "         'colsample_bytree': 0.9,\n",
    "         'subsample': 0.9,\n",
    "#         'nthread': 32,\n",
    "         'nthread': cpu_count(),\n",
    "         'bagging_freq': 1,\n",
    "         'verbose':-1,\n",
    "         'seed': 71\n",
    "         }\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:29:41.167539Z",
     "start_time": "2018-07-25T18:19:02.787935Z"
    },
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[50]\tcv_agg's auc: 0.769823 + 0.00338049\n",
      "[100]\tcv_agg's auc: 0.773407 + 0.00323751\n",
      "[150]\tcv_agg's auc: 0.776822 + 0.00322845\n",
      "[200]\tcv_agg's auc: 0.780142 + 0.00328529\n",
      "[250]\tcv_agg's auc: 0.783127 + 0.00331152\n",
      "[300]\tcv_agg's auc: 0.785781 + 0.00323746\n",
      "[350]\tcv_agg's auc: 0.787881 + 0.00322167\n",
      "[400]\tcv_agg's auc: 0.789557 + 0.00311069\n",
      "[450]\tcv_agg's auc: 0.79099 + 0.003146\n",
      "[500]\tcv_agg's auc: 0.792149 + 0.00310973\n",
      "[550]\tcv_agg's auc: 0.793187 + 0.0031717\n",
      "[600]\tcv_agg's auc: 0.794078 + 0.00321107\n",
      "[650]\tcv_agg's auc: 0.794784 + 0.00319545\n",
      "[700]\tcv_agg's auc: 0.795468 + 0.0032306\n",
      "[750]\tcv_agg's auc: 0.795997 + 0.00329536\n",
      "[800]\tcv_agg's auc: 0.796494 + 0.0032991\n",
      "[850]\tcv_agg's auc: 0.796986 + 0.00332342\n",
      "[900]\tcv_agg's auc: 0.797418 + 0.00334438\n",
      "[950]\tcv_agg's auc: 0.797817 + 0.00336117\n",
      "[1000]\tcv_agg's auc: 0.798161 + 0.00335759\n",
      "[1050]\tcv_agg's auc: 0.798496 + 0.00333883\n",
      "[1100]\tcv_agg's auc: 0.798796 + 0.0033446\n",
      "[1150]\tcv_agg's auc: 0.79912 + 0.00335258\n",
      "[1200]\tcv_agg's auc: 0.799391 + 0.00334464\n",
      "[1250]\tcv_agg's auc: 0.799657 + 0.00335042\n",
      "[1300]\tcv_agg's auc: 0.799906 + 0.0033484\n",
      "[1350]\tcv_agg's auc: 0.800175 + 0.00336527\n",
      "[1400]\tcv_agg's auc: 0.80041 + 0.00341142\n",
      "[1450]\tcv_agg's auc: 0.800643 + 0.00342879\n",
      "[1500]\tcv_agg's auc: 0.800855 + 0.00344007\n",
      "[1550]\tcv_agg's auc: 0.801078 + 0.00344681\n",
      "[1600]\tcv_agg's auc: 0.80125 + 0.00345783\n",
      "[1650]\tcv_agg's auc: 0.801415 + 0.00346196\n",
      "[1700]\tcv_agg's auc: 0.801582 + 0.0034756\n",
      "[1750]\tcv_agg's auc: 0.801732 + 0.00347662\n",
      "[1800]\tcv_agg's auc: 0.80189 + 0.00349906\n",
      "[1850]\tcv_agg's auc: 0.80202 + 0.00350073\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-24-989b3cfc829f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      5\u001b[0m ret = lgb.cv(param, dtrain, 9999, nfold=7,\n\u001b[1;32m      6\u001b[0m              \u001b[0mearly_stopping_rounds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose_eval\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m              seed=71)\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"CV auc-mean: {ret['auc-mean'][-1]} + {ret['auc-stdv'][-1]}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py\u001b[0m in \u001b[0;36mcv\u001b[0;34m(params, train_set, num_boost_round, folds, nfold, stratified, shuffle, metrics, fobj, feval, init_model, feature_name, categorical_feature, early_stopping_rounds, fpreproc, verbose_eval, show_stdv, seed, callbacks)\u001b[0m\n\u001b[1;32m    449\u001b[0m                                     \u001b[0mend_iteration\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_boost_round\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    450\u001b[0m                                     evaluation_result_list=None))\n\u001b[0;32m--> 451\u001b[0;31m         \u001b[0mcvfolds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfobj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    452\u001b[0m         \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_agg_cv_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcvfolds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval_valid\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    453\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmean\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstd\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/lightgbm/engine.py\u001b[0m in \u001b[0;36mhandlerFunction\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    247\u001b[0m             \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    248\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mbooster\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mboosters\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 249\u001b[0;31m                 \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbooster\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    250\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    251\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mhandlerFunction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/lightgbm/basic.py\u001b[0m in \u001b[0;36mupdate\u001b[0;34m(self, train_set, fobj)\u001b[0m\n\u001b[1;32m   1526\u001b[0m             _safe_call(_LIB.LGBM_BoosterUpdateOneIter(\n\u001b[1;32m   1527\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1528\u001b[0;31m                 ctypes.byref(is_finished)))\n\u001b[0m\u001b[1;32m   1529\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__is_predicted_cur_iter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;32mFalse\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__num_dataset\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1530\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mis_finished\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "CAT = list( set(X_train_lb800.columns)&set(utils_cat.ALL))\n",
    "dtrain = lgb.Dataset(X_train_lb800, y, categorical_feature=CAT )\n",
    "gc.collect()\n",
    "\n",
    "ret = lgb.cv(param, dtrain, 9999, nfold=7,\n",
    "             early_stopping_rounds=100, verbose_eval=50,\n",
    "             seed=71)\n",
    "\n",
    "result = f\"CV auc-mean: {ret['auc-mean'][-1]} + {ret['auc-stdv'][-1]}\"\n",
    "print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:33:49.855265Z",
     "start_time": "2018-07-25T18:31:07.247734Z"
    }
   },
   "outputs": [],
   "source": [
    "CAT = list( set(X_train_lb800.columns)&set(utils_cat.ALL))\n",
    "dtrain = lgb.Dataset(X_train_lb800, y, categorical_feature=CAT )\n",
    "gc.collect()\n",
    "\n",
    "model = lgb.train(param, dtrain, 3000)\n",
    "imp_lb800 = ex.getImp(model).sort_values(['gain', 'feature'], ascending=[False, True])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:36:13.899355Z",
     "start_time": "2018-07-25T18:33:49.860551Z"
    }
   },
   "outputs": [],
   "source": [
    "CAT = list( set(X_train_lb804.columns)&set(utils_cat.ALL))\n",
    "dtrain = lgb.Dataset(X_train_lb804, y, categorical_feature=CAT )\n",
    "gc.collect()\n",
    "\n",
    "model = lgb.train(param, dtrain, 3000)\n",
    "imp_lb804 = ex.getImp(model).sort_values(['gain', 'feature'], ascending=[False, True])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:36:13.913539Z",
     "start_time": "2018-07-25T18:36:13.901512Z"
    }
   },
   "outputs": [],
   "source": [
    "imp_lb800.to_csv('lb800.csv', index=False)\n",
    "imp_lb804.to_csv('lb804.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:40:08.011872Z",
     "start_time": "2018-07-25T18:40:08.008268Z"
    }
   },
   "outputs": [],
   "source": [
    "imp_lb800_ovf = imp_lb800[imp_lb800.feature.isin(feature_ovf)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:40:31.142791Z",
     "start_time": "2018-07-25T18:40:31.138396Z"
    }
   },
   "outputs": [],
   "source": [
    "imp_lb800_ovf.to_csv('lb800_ovf.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# adversarial validation with ovf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:49:59.054501Z",
     "start_time": "2018-07-25T18:49:55.408464Z"
    }
   },
   "outputs": [],
   "source": [
    "X_train_lb800_ovf = X_train_lb800[feature_ovf]\n",
    "X_train_lb800_ovf['target'] = 1\n",
    "\n",
    "X_test_lb800_ovf  = X_test_lb800[feature_ovf]\n",
    "X_test_lb800_ovf['target'] = 0\n",
    "\n",
    "X_ovf = pd.concat([X_train_lb800_ovf, X_test_lb800_ovf], ignore_index=True)\n",
    "y_ovf = X_ovf.target\n",
    "X_ovf.drop('target', axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2018-07-25T18:51:35.125Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[50]\tcv_agg's auc: 0.880407 + 0.00124874\n",
      "[100]\tcv_agg's auc: 0.8837 + 0.00121335\n",
      "[150]\tcv_agg's auc: 0.886695 + 0.00132782\n",
      "[200]\tcv_agg's auc: 0.889878 + 0.00120145\n",
      "[250]\tcv_agg's auc: 0.892953 + 0.00131629\n",
      "[300]\tcv_agg's auc: 0.89584 + 0.00130764\n",
      "[350]\tcv_agg's auc: 0.898574 + 0.00127852\n",
      "[400]\tcv_agg's auc: 0.90172 + 0.00122743\n",
      "[450]\tcv_agg's auc: 0.903881 + 0.00125811\n",
      "[500]\tcv_agg's auc: 0.905632 + 0.00122126\n",
      "[550]\tcv_agg's auc: 0.907056 + 0.00125124\n",
      "[600]\tcv_agg's auc: 0.908316 + 0.00124777\n",
      "[650]\tcv_agg's auc: 0.909413 + 0.00120961\n",
      "[700]\tcv_agg's auc: 0.910371 + 0.00122499\n",
      "[750]\tcv_agg's auc: 0.911307 + 0.00124072\n",
      "[800]\tcv_agg's auc: 0.912172 + 0.0012671\n",
      "[850]\tcv_agg's auc: 0.913 + 0.00123488\n",
      "[900]\tcv_agg's auc: 0.913781 + 0.00123768\n",
      "[950]\tcv_agg's auc: 0.914547 + 0.00121634\n",
      "[1000]\tcv_agg's auc: 0.91523 + 0.00123803\n",
      "[1050]\tcv_agg's auc: 0.915936 + 0.00125653\n",
      "[1100]\tcv_agg's auc: 0.916571 + 0.00124066\n",
      "[1150]\tcv_agg's auc: 0.917216 + 0.00124012\n",
      "[1200]\tcv_agg's auc: 0.917806 + 0.00127138\n",
      "[1250]\tcv_agg's auc: 0.91837 + 0.00126109\n",
      "[1300]\tcv_agg's auc: 0.918926 + 0.00124194\n",
      "[1350]\tcv_agg's auc: 0.919468 + 0.00122714\n",
      "[1400]\tcv_agg's auc: 0.920005 + 0.00119656\n",
      "[1450]\tcv_agg's auc: 0.920476 + 0.00120546\n",
      "[1500]\tcv_agg's auc: 0.920992 + 0.00120151\n",
      "[1550]\tcv_agg's auc: 0.921479 + 0.00123208\n",
      "[1600]\tcv_agg's auc: 0.921944 + 0.00121486\n",
      "[1650]\tcv_agg's auc: 0.922365 + 0.00119098\n",
      "[1700]\tcv_agg's auc: 0.922771 + 0.00120504\n",
      "[1750]\tcv_agg's auc: 0.923185 + 0.00119884\n",
      "[1800]\tcv_agg's auc: 0.923593 + 0.00119904\n",
      "[1850]\tcv_agg's auc: 0.92396 + 0.00121068\n",
      "[1900]\tcv_agg's auc: 0.924329 + 0.0012224\n",
      "[1950]\tcv_agg's auc: 0.924637 + 0.00122391\n",
      "[2000]\tcv_agg's auc: 0.924972 + 0.00125291\n",
      "[2050]\tcv_agg's auc: 0.925299 + 0.00122794\n",
      "[2100]\tcv_agg's auc: 0.925638 + 0.00124629\n",
      "[2150]\tcv_agg's auc: 0.92598 + 0.00124231\n",
      "[2200]\tcv_agg's auc: 0.92628 + 0.00122686\n",
      "[2250]\tcv_agg's auc: 0.92658 + 0.00122826\n",
      "[2300]\tcv_agg's auc: 0.92684 + 0.00123777\n",
      "[2350]\tcv_agg's auc: 0.927139 + 0.00124605\n",
      "[2400]\tcv_agg's auc: 0.927428 + 0.00126492\n",
      "[2450]\tcv_agg's auc: 0.927658 + 0.00125975\n",
      "[2500]\tcv_agg's auc: 0.927919 + 0.0012707\n",
      "[2550]\tcv_agg's auc: 0.928163 + 0.00125537\n",
      "[2600]\tcv_agg's auc: 0.928409 + 0.00123403\n",
      "[2650]\tcv_agg's auc: 0.928617 + 0.0012295\n",
      "[2700]\tcv_agg's auc: 0.928844 + 0.00125151\n",
      "[2750]\tcv_agg's auc: 0.929071 + 0.00125157\n",
      "[2800]\tcv_agg's auc: 0.929284 + 0.0012693\n",
      "[2850]\tcv_agg's auc: 0.929504 + 0.00127603\n",
      "[2900]\tcv_agg's auc: 0.929697 + 0.00127497\n",
      "[2950]\tcv_agg's auc: 0.929921 + 0.00127659\n",
      "[3000]\tcv_agg's auc: 0.930115 + 0.00126734\n",
      "[3050]\tcv_agg's auc: 0.930317 + 0.00129216\n",
      "[3100]\tcv_agg's auc: 0.930482 + 0.00131936\n",
      "[3150]\tcv_agg's auc: 0.930661 + 0.00130699\n",
      "[3200]\tcv_agg's auc: 0.930834 + 0.00132509\n",
      "[3250]\tcv_agg's auc: 0.931013 + 0.00132526\n",
      "[3300]\tcv_agg's auc: 0.931157 + 0.00131065\n",
      "[3350]\tcv_agg's auc: 0.931323 + 0.00130913\n",
      "[3400]\tcv_agg's auc: 0.931471 + 0.00131302\n",
      "[3450]\tcv_agg's auc: 0.931611 + 0.00131094\n",
      "[3500]\tcv_agg's auc: 0.931772 + 0.00129497\n",
      "[3550]\tcv_agg's auc: 0.931913 + 0.00129697\n",
      "[3600]\tcv_agg's auc: 0.932067 + 0.00131051\n",
      "[3650]\tcv_agg's auc: 0.932201 + 0.00130324\n",
      "[3700]\tcv_agg's auc: 0.932341 + 0.0012981\n",
      "[3750]\tcv_agg's auc: 0.93248 + 0.00129713\n",
      "[3800]\tcv_agg's auc: 0.932611 + 0.00128272\n",
      "[3850]\tcv_agg's auc: 0.932728 + 0.00128035\n",
      "[3900]\tcv_agg's auc: 0.932862 + 0.00128614\n",
      "[3950]\tcv_agg's auc: 0.932981 + 0.00129972\n",
      "[4000]\tcv_agg's auc: 0.933095 + 0.00131037\n",
      "[4050]\tcv_agg's auc: 0.933218 + 0.0013237\n",
      "[4100]\tcv_agg's auc: 0.93335 + 0.00130977\n",
      "[4150]\tcv_agg's auc: 0.933475 + 0.00129843\n",
      "[4200]\tcv_agg's auc: 0.93357 + 0.00128706\n",
      "[4250]\tcv_agg's auc: 0.933697 + 0.00128072\n",
      "[4300]\tcv_agg's auc: 0.933809 + 0.00128442\n",
      "[4350]\tcv_agg's auc: 0.933917 + 0.00127146\n",
      "[4400]\tcv_agg's auc: 0.934014 + 0.0012707\n",
      "[4450]\tcv_agg's auc: 0.934121 + 0.00127364\n",
      "[4500]\tcv_agg's auc: 0.934227 + 0.00127177\n",
      "[4550]\tcv_agg's auc: 0.934326 + 0.00127593\n",
      "[4600]\tcv_agg's auc: 0.934421 + 0.00127463\n",
      "[4650]\tcv_agg's auc: 0.934518 + 0.00127412\n",
      "[4700]\tcv_agg's auc: 0.934627 + 0.00125998\n",
      "[4750]\tcv_agg's auc: 0.934727 + 0.00125489\n",
      "[4800]\tcv_agg's auc: 0.934837 + 0.00125404\n",
      "[4850]\tcv_agg's auc: 0.934922 + 0.00124606\n",
      "[4900]\tcv_agg's auc: 0.935014 + 0.00124401\n",
      "[4950]\tcv_agg's auc: 0.935096 + 0.00123829\n",
      "[5000]\tcv_agg's auc: 0.935182 + 0.00125121\n",
      "[5050]\tcv_agg's auc: 0.935276 + 0.00123747\n",
      "[5100]\tcv_agg's auc: 0.935357 + 0.00123107\n",
      "[5150]\tcv_agg's auc: 0.935428 + 0.0012163\n",
      "[5200]\tcv_agg's auc: 0.935501 + 0.00122377\n",
      "[5250]\tcv_agg's auc: 0.93558 + 0.00122128\n",
      "[5300]\tcv_agg's auc: 0.935651 + 0.00121241\n",
      "[5350]\tcv_agg's auc: 0.935728 + 0.00120128\n",
      "[5400]\tcv_agg's auc: 0.935812 + 0.00120407\n",
      "[5450]\tcv_agg's auc: 0.935895 + 0.00120666\n",
      "[5500]\tcv_agg's auc: 0.935974 + 0.00121609\n",
      "[5550]\tcv_agg's auc: 0.936055 + 0.00121069\n",
      "[5600]\tcv_agg's auc: 0.936137 + 0.00120782\n",
      "[5650]\tcv_agg's auc: 0.936217 + 0.00122228\n",
      "[5700]\tcv_agg's auc: 0.936288 + 0.00122427\n",
      "[5750]\tcv_agg's auc: 0.936352 + 0.0012364\n",
      "[5800]\tcv_agg's auc: 0.936426 + 0.00124232\n",
      "[5850]\tcv_agg's auc: 0.936502 + 0.00123927\n",
      "[5900]\tcv_agg's auc: 0.936567 + 0.00124625\n",
      "[5950]\tcv_agg's auc: 0.936629 + 0.00124872\n",
      "[6000]\tcv_agg's auc: 0.936699 + 0.00124461\n",
      "[6050]\tcv_agg's auc: 0.936766 + 0.00125162\n",
      "[6100]\tcv_agg's auc: 0.936825 + 0.00125177\n",
      "[6150]\tcv_agg's auc: 0.936881 + 0.00125549\n",
      "[6200]\tcv_agg's auc: 0.936952 + 0.00125303\n",
      "[6250]\tcv_agg's auc: 0.937012 + 0.0012561\n",
      "[6300]\tcv_agg's auc: 0.937069 + 0.00125209\n"
     ]
    }
   ],
   "source": [
    "CAT = list( set(X_ovf.columns)&set(utils_cat.ALL))\n",
    "dtrain = lgb.Dataset(X_ovf, y_ovf, categorical_feature=CAT )\n",
    "gc.collect()\n",
    "\n",
    "ret = lgb.cv(param, dtrain, 9999, nfold=7,\n",
    "             early_stopping_rounds=100, verbose_eval=50,\n",
    "             seed=71)\n",
    "\n",
    "result = f\"CV auc-mean: {ret['auc-mean'][-1]} + {ret['auc-stdv'][-1]}\"\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:50:26.945974Z",
     "start_time": "2018-07-25T18:49:59.056018Z"
    }
   },
   "outputs": [],
   "source": [
    "CAT = list( set(X_ovf.columns)&set(utils_cat.ALL))\n",
    "dtrain = lgb.Dataset(X_ovf, y_ovf, categorical_feature=CAT )\n",
    "gc.collect()\n",
    "\n",
    "model = lgb.train(param, dtrain, 1000)\n",
    "imp_ovf = ex.getImp(model).sort_values(['gain', 'feature'], ascending=[False, True])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-07-25T18:50:34.330950Z",
     "start_time": "2018-07-25T18:50:34.308163Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>split</th>\n",
       "      <th>gain</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>f601_Closed_MONTHS_BALANCE_sum</td>\n",
       "      <td>3117</td>\n",
       "      <td>2.197206e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>f602_Active_MONTHS_BALANCE_mean</td>\n",
       "      <td>2134</td>\n",
       "      <td>6.617591e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>f505_Closed_AMT_CREDIT_SUM-d-app_AMT_INCOME_TO...</td>\n",
       "      <td>227</td>\n",
       "      <td>5.399114e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>f505_Active_DAYS_CREDIT-d-app_DAYS_BIRTH_min</td>\n",
       "      <td>307</td>\n",
       "      <td>1.641696e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>f505_Active_DAYS_CREDIT_max</td>\n",
       "      <td>872</td>\n",
       "      <td>1.605563e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>f105_prevapp_future_payment_21m</td>\n",
       "      <td>915</td>\n",
       "      <td>1.570887e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>f505_Closed_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_sum</td>\n",
       "      <td>1196</td>\n",
       "      <td>1.431801e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>f501_DAYS_CREDIT_min</td>\n",
       "      <td>1387</td>\n",
       "      <td>1.410532e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>f105_prevapp_future_payment_20m</td>\n",
       "      <td>597</td>\n",
       "      <td>8.742117e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>f505_Active_DAYS_CREDIT_UPDATE_var</td>\n",
       "      <td>876</td>\n",
       "      <td>6.358519e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DE...</td>\n",
       "      <td>83</td>\n",
       "      <td>5.216048e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>f501_DAYS_CREDIT-d-app_DAYS_BIRTH_mean</td>\n",
       "      <td>427</td>\n",
       "      <td>5.153050e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>f310_notdelay_DAYS_ENTRY_PAYMENT-m-app_DAYS_BI...</td>\n",
       "      <td>636</td>\n",
       "      <td>4.570043e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_mean</td>\n",
       "      <td>79</td>\n",
       "      <td>4.525381e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>f310_DAYS_ENTRY_PAYMENT-m-app_DAYS_BIRTH_var</td>\n",
       "      <td>768</td>\n",
       "      <td>4.176784e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>f101_nyg-high_DAYS_DECISION_mean</td>\n",
       "      <td>732</td>\n",
       "      <td>3.128695e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>f105_prevapp_future_payment_12m</td>\n",
       "      <td>651</td>\n",
       "      <td>3.097413e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>f001_AMT_CREDIT-d-cnt_adults</td>\n",
       "      <td>678</td>\n",
       "      <td>3.039603e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>f501_DAYS_CREDIT_UPDATE_diff_mean</td>\n",
       "      <td>619</td>\n",
       "      <td>2.927384e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>f501_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_mean</td>\n",
       "      <td>768</td>\n",
       "      <td>2.771257e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>f001_AMT_GOODS_PRICE-d-cnt_adults</td>\n",
       "      <td>596</td>\n",
       "      <td>2.644722e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>f313_days_delayed_payment_pctchange_min</td>\n",
       "      <td>181</td>\n",
       "      <td>2.611260e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>f305_con_AMT_PAYMENT-d-app_AMT_CREDIT_mean</td>\n",
       "      <td>239</td>\n",
       "      <td>2.536922e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>f303_AMT_PAYMENT-d-AMT_ANNUITY_min</td>\n",
       "      <td>464</td>\n",
       "      <td>2.301015e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>f501_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_...</td>\n",
       "      <td>425</td>\n",
       "      <td>2.184166e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>f501_DAYS_CREDIT_UPDATE_pctchange_var</td>\n",
       "      <td>444</td>\n",
       "      <td>2.109488e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>f305_cas_DAYS_ENTRY_PAYMENT_mean</td>\n",
       "      <td>494</td>\n",
       "      <td>1.870025e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>f312_days_delayed_payment_diff_var</td>\n",
       "      <td>199</td>\n",
       "      <td>1.865849e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>f101_completed_AMT_APPLICATION-d-app_AMT_GOODS...</td>\n",
       "      <td>318</td>\n",
       "      <td>1.794695e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>f505_Active_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_...</td>\n",
       "      <td>166</td>\n",
       "      <td>1.664402e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>f501_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE_diff...</td>\n",
       "      <td>52</td>\n",
       "      <td>6.507261e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>f502_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_mean</td>\n",
       "      <td>41</td>\n",
       "      <td>6.463721e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>213</th>\n",
       "      <td>f001_DAYS_ID_PUBLISH-d-DAYS_REGISTRATION</td>\n",
       "      <td>56</td>\n",
       "      <td>6.437585e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>f501_DAYS_CREDIT-m-app_DAYS_EMPLOYED_min</td>\n",
       "      <td>48</td>\n",
       "      <td>6.007222e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>215</th>\n",
       "      <td>f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_min</td>\n",
       "      <td>43</td>\n",
       "      <td>5.831270e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>216</th>\n",
       "      <td>f305_con_days_weighted_delay_min</td>\n",
       "      <td>47</td>\n",
       "      <td>5.675828e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>217</th>\n",
       "      <td>f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_REGISTRATI...</td>\n",
       "      <td>49</td>\n",
       "      <td>5.632591e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>218</th>\n",
       "      <td>f503_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchang...</td>\n",
       "      <td>48</td>\n",
       "      <td>5.588450e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>219</th>\n",
       "      <td>f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATI...</td>\n",
       "      <td>44</td>\n",
       "      <td>5.502528e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220</th>\n",
       "      <td>f502_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_max</td>\n",
       "      <td>40</td>\n",
       "      <td>5.454684e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221</th>\n",
       "      <td>f505_Active_DAYS_CREDIT-m-app_DAYS_REGISTRATIO...</td>\n",
       "      <td>50</td>\n",
       "      <td>5.420234e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222</th>\n",
       "      <td>f001_LIVINGAREA_AVG</td>\n",
       "      <td>40</td>\n",
       "      <td>5.409238e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223</th>\n",
       "      <td>f101_nyg-high_HOUR_APPR_PROCESS_START_max</td>\n",
       "      <td>30</td>\n",
       "      <td>5.306953e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>f503_DAYS_CREDIT-d-app_DAYS_BIRTH_min</td>\n",
       "      <td>40</td>\n",
       "      <td>5.260760e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225</th>\n",
       "      <td>f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DE...</td>\n",
       "      <td>42</td>\n",
       "      <td>5.216467e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226</th>\n",
       "      <td>f503_AMT_CREDIT_SUM_var</td>\n",
       "      <td>36</td>\n",
       "      <td>5.075450e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>f505_Active_AMT_CREDIT_SUM_diff_mean</td>\n",
       "      <td>43</td>\n",
       "      <td>5.050835e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228</th>\n",
       "      <td>f101_refused_DAYS_DECISION_var</td>\n",
       "      <td>27</td>\n",
       "      <td>4.875742e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229</th>\n",
       "      <td>f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DE...</td>\n",
       "      <td>34</td>\n",
       "      <td>4.577358e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230</th>\n",
       "      <td>f001_LIVINGAREA_MODE</td>\n",
       "      <td>35</td>\n",
       "      <td>4.296089e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231</th>\n",
       "      <td>f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_...</td>\n",
       "      <td>39</td>\n",
       "      <td>4.047823e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>232</th>\n",
       "      <td>f504_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var</td>\n",
       "      <td>29</td>\n",
       "      <td>3.878786e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>233</th>\n",
       "      <td>f305_delay_con_NUM_INSTALMENT_ratio_mean</td>\n",
       "      <td>34</td>\n",
       "      <td>3.709510e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>234</th>\n",
       "      <td>f502_AMT_CREDIT_SUM-d-debt-p-AMT_CREDIT_SUM_DE...</td>\n",
       "      <td>19</td>\n",
       "      <td>3.455408e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235</th>\n",
       "      <td>f504_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff...</td>\n",
       "      <td>23</td>\n",
       "      <td>3.349408e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>236</th>\n",
       "      <td>f504_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctc...</td>\n",
       "      <td>17</td>\n",
       "      <td>1.950240e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>237</th>\n",
       "      <td>f505_Active_AMT_CREDIT_MAX_OVERDUE_pctchange_max</td>\n",
       "      <td>6</td>\n",
       "      <td>1.421239e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>238</th>\n",
       "      <td>f001_ENTRANCES_AVG</td>\n",
       "      <td>11</td>\n",
       "      <td>1.070976e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>f502_AMT_CREDIT_MAX_OVERDUE_sum</td>\n",
       "      <td>7</td>\n",
       "      <td>8.646801e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>f501_AMT_CREDIT_SUM_OVERDUE-d-app_AMT_INCOME_T...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>241 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               feature  split          gain\n",
       "0                       f601_Closed_MONTHS_BALANCE_sum   3117  2.197206e+06\n",
       "1                      f602_Active_MONTHS_BALANCE_mean   2134  6.617591e+05\n",
       "2    f505_Closed_AMT_CREDIT_SUM-d-app_AMT_INCOME_TO...    227  5.399114e+05\n",
       "3         f505_Active_DAYS_CREDIT-d-app_DAYS_BIRTH_min    307  1.641696e+05\n",
       "4                          f505_Active_DAYS_CREDIT_max    872  1.605563e+05\n",
       "5                      f105_prevapp_future_payment_21m    915  1.570887e+05\n",
       "6      f505_Closed_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_sum   1196  1.431801e+05\n",
       "7                                 f501_DAYS_CREDIT_min   1387  1.410532e+05\n",
       "8                      f105_prevapp_future_payment_20m    597  8.742117e+04\n",
       "9                   f505_Active_DAYS_CREDIT_UPDATE_var    876  6.358519e+04\n",
       "10   f505_Closed_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DE...     83  5.216048e+04\n",
       "11              f501_DAYS_CREDIT-d-app_DAYS_BIRTH_mean    427  5.153050e+04\n",
       "12   f310_notdelay_DAYS_ENTRY_PAYMENT-m-app_DAYS_BI...    636  4.570043e+04\n",
       "13          f501_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_mean     79  4.525381e+04\n",
       "14        f310_DAYS_ENTRY_PAYMENT-m-app_DAYS_BIRTH_var    768  4.176784e+04\n",
       "15                    f101_nyg-high_DAYS_DECISION_mean    732  3.128695e+04\n",
       "16                     f105_prevapp_future_payment_12m    651  3.097413e+04\n",
       "17                        f001_AMT_CREDIT-d-cnt_adults    678  3.039603e+04\n",
       "18                   f501_DAYS_CREDIT_UPDATE_diff_mean    619  2.927384e+04\n",
       "19   f501_DAYS_ENDDATE_FACT-d-app_DAYS_ID_PUBLISH_mean    768  2.771257e+04\n",
       "20                   f001_AMT_GOODS_PRICE-d-cnt_adults    596  2.644722e+04\n",
       "21             f313_days_delayed_payment_pctchange_min    181  2.611260e+04\n",
       "22          f305_con_AMT_PAYMENT-d-app_AMT_CREDIT_mean    239  2.536922e+04\n",
       "23                  f303_AMT_PAYMENT-d-AMT_ANNUITY_min    464  2.301015e+04\n",
       "24   f501_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_CHANGE_...    425  2.184166e+04\n",
       "25               f501_DAYS_CREDIT_UPDATE_pctchange_var    444  2.109488e+04\n",
       "26                    f305_cas_DAYS_ENTRY_PAYMENT_mean    494  1.870025e+04\n",
       "27                  f312_days_delayed_payment_diff_var    199  1.865849e+04\n",
       "28   f101_completed_AMT_APPLICATION-d-app_AMT_GOODS...    318  1.794695e+04\n",
       "29   f505_Active_DAYS_CREDIT-d-app_DAYS_LAST_PHONE_...    166  1.664402e+04\n",
       "..                                                 ...    ...           ...\n",
       "211  f501_AMT_CREDIT_SUM-d-app_AMT_GOODS_PRICE_diff...     52  6.507261e+02\n",
       "212          f502_DAYS_ENDDATE_FACT-m-DAYS_CREDIT_mean     41  6.463721e+02\n",
       "213           f001_DAYS_ID_PUBLISH-d-DAYS_REGISTRATION     56  6.437585e+02\n",
       "214           f501_DAYS_CREDIT-m-app_DAYS_EMPLOYED_min     48  6.007222e+02\n",
       "215   f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_EMPLOYED_min     43  5.831270e+02\n",
       "216                   f305_con_days_weighted_delay_min     47  5.675828e+02\n",
       "217  f501_DAYS_CREDIT_ENDDATE-m-app_DAYS_REGISTRATI...     49  5.632591e+02\n",
       "218  f503_AMT_CREDIT_SUM-d-app_AMT_ANNUITY_pctchang...     48  5.588450e+02\n",
       "219  f501_DAYS_CREDIT_ENDDATE-d-app_DAYS_REGISTRATI...     44  5.502528e+02\n",
       "220      f502_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_max     40  5.454684e+02\n",
       "221  f505_Active_DAYS_CREDIT-m-app_DAYS_REGISTRATIO...     50  5.420234e+02\n",
       "222                                f001_LIVINGAREA_AVG     40  5.409238e+02\n",
       "223          f101_nyg-high_HOUR_APPR_PROCESS_START_max     30  5.306953e+02\n",
       "224              f503_DAYS_CREDIT-d-app_DAYS_BIRTH_min     40  5.260760e+02\n",
       "225  f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DE...     42  5.216467e+02\n",
       "226                            f503_AMT_CREDIT_SUM_var     36  5.075450e+02\n",
       "227               f505_Active_AMT_CREDIT_SUM_diff_mean     43  5.050835e+02\n",
       "228                     f101_refused_DAYS_DECISION_var     27  4.875742e+02\n",
       "229  f505_Active_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DE...     34  4.577358e+02\n",
       "230                               f001_LIVINGAREA_MODE     35  4.296089e+02\n",
       "231  f505_Closed_DAYS_CREDIT_ENDDATE-d-app_DAYS_ID_...     39  4.047823e+02\n",
       "232         f504_DAYS_CREDIT_ENDDATE-m-DAYS_CREDIT_var     29  3.878786e+02\n",
       "233           f305_delay_con_NUM_INSTALMENT_ratio_mean     34  3.709510e+02\n",
       "234  f502_AMT_CREDIT_SUM-d-debt-p-AMT_CREDIT_SUM_DE...     19  3.455408e+02\n",
       "235  f504_AMT_CREDIT_SUM_DEBT-d-AMT_CREDIT_SUM_diff...     23  3.349408e+02\n",
       "236  f504_AMT_CREDIT_SUM-m-AMT_CREDIT_SUM_DEBT_pctc...     17  1.950240e+02\n",
       "237   f505_Active_AMT_CREDIT_MAX_OVERDUE_pctchange_max      6  1.421239e+02\n",
       "238                                 f001_ENTRANCES_AVG     11  1.070976e+02\n",
       "239                    f502_AMT_CREDIT_MAX_OVERDUE_sum      7  8.646801e+01\n",
       "240  f501_AMT_CREDIT_SUM_OVERDUE-d-app_AMT_INCOME_T...      0  0.000000e+00\n",
       "\n",
       "[241 rows x 3 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "imp_ovf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
